From f722fd937db2f3cacf1947d538c66528fd16eb89 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sun, 1 Jun 2008 22:41:40 +1000 Subject: nv50: import current "state of the art" nv50 code --- src/gallium/drivers/nv50/nv50_program.c | 742 ++++++++++++++++++++++++++++++++ 1 file changed, 742 insertions(+) create mode 100644 src/gallium/drivers/nv50/nv50_program.c (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c new file mode 100644 index 0000000000..30953b7d8a --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -0,0 +1,742 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_inlines.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_util.h" + +#include "nv50_context.h" +#include "nv50_state.h" + +#define OP_MOV 0x001 +#define OP_RCP 0x009 +#define OP_ADD 0x00b +#define OP_MUL 0x00c +#define NV50_SU_MAX_TEMP 64 + +struct nv50_reg { + enum { + P_TEMP, + P_ATTR, + P_RESULT, + P_CONST, + P_IMMD + } type; + int index; + + int hw; +}; + +struct nv50_pc { + struct nv50_program *p; + + /* hw resources */ + struct nv50_reg *r_temp[NV50_SU_MAX_TEMP]; + + /* tgsi resources */ + struct nv50_reg *temp; + int temp_nr; + struct nv50_reg *attr; + int attr_nr; + struct nv50_reg *result; + int result_nr; + struct nv50_reg *param; + int param_nr; + struct nv50_reg *immd; + float *immd_buf; + int immd_nr; +}; + +static void +alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) +{ + int i; + + if (reg->type != P_TEMP || reg->hw >= 0) + return; + + for (i = 0; i < NV50_SU_MAX_TEMP; i++) { + if (!(pc->r_temp[i])) { + pc->r_temp[i] = reg; + reg->hw = i; + if (pc->p->cfg.vp.high_temp < (i + 1)) + pc->p->cfg.vp.high_temp = i + 1; + return; + } + } + + assert(0); +} + +static struct nv50_reg * +alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) +{ + struct nv50_reg *r; + int i; + + if (dst && dst->type == P_TEMP && dst->hw == -1) + return dst; + + for (i = 0; i < NV50_SU_MAX_TEMP; i++) { + if (!pc->r_temp[i]) { + r = CALLOC_STRUCT(nv50_reg); + r->type = P_TEMP; + r->index = -1; + r->hw = i; + pc->r_temp[i] = r; + return r; + } + } + + assert(0); + return NULL; +} + +static void +free_temp(struct nv50_pc *pc, struct nv50_reg *r) +{ + if (r->index == -1) { + FREE(pc->r_temp[r->hw]); + pc->r_temp[r->hw] = NULL; + } +} + +#if 0 +static struct nv50_reg * +constant(struct nv50_pc *pc, int pipe, int c, float v) +{ + struct nv50_reg *r = CALLOC_STRUCT(nv50_reg); + struct nv50_program *p = pc->p; + struct nv50_program_data *pd; + int idx; + + if (pipe >= 0) { + for (idx = 0; idx < p->nr_consts; idx++) { + if (p->consts[idx].index == pipe) + return nv40_sr(NV40SR_CONST, idx); + } + } + + idx = p->nr_consts++; + p->consts = realloc(p->consts, sizeof(*pd) * p->nr_consts); + pd = &p->consts[idx]; + + pd->index = pipe; + pd->component = c; + pd->value = v; + return nv40_sr(NV40SR_CONST, idx); +} +#endif + +static void +emit(struct nv50_pc *pc, unsigned op, struct nv50_reg *dst, + struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2) +{ + struct nv50_program *p = pc->p; + struct nv50_reg *tmp = NULL, *tmp2 = NULL; + unsigned inst[2] = { 0, 0 }; + + /* Grr.. Fun restrictions on where attribs can be sourced from.. */ + if (src1 && src1->type == P_ATTR) { + tmp = alloc_temp(pc, dst); + emit(pc, 1, tmp, src1, NULL, NULL); + src1 = tmp; + } + + if (src2 && src2->type == P_ATTR) { + tmp2 = alloc_temp(pc, dst); + emit(pc, 1, tmp2, src2, NULL, NULL); + src2 = tmp2; + } + + /* Get this out of the way first. What type of opcode do we + * want/need to build? + */ + if ((op & 0x3f0) || dst->type == P_RESULT || + (src0 && src0->type == P_ATTR) || src1 || src2) + inst[0] |= 0x00000001; + + if (inst[0] & 0x00000001) { + inst[0] |= ((op & 0xf) << 28); + inst[1] |= ((op >> 4) << 26); + + alloc_reg(pc, dst); + if (dst->type == P_RESULT) + inst[1] |= 0x00000008; + inst[0] |= (dst->hw << 2); + + if (src0) { + if (src0->type == P_ATTR) + inst[1] |= 0x00200000; + else + if (src0->type == P_CONST || src0->type == P_IMMD) + assert(0); + alloc_reg(pc, src0); + inst[0] |= (src0->hw << 9); + } + + if (src1) { + if (src1->type == P_CONST || src1->type == P_IMMD) { + inst[0] |= 0x00800000; /* src1 is const */ + /*XXX: does src1 come from "src2" now? */ + alloc_reg(pc, src1); + inst[0] |= (src1->hw << 16); + } else { + alloc_reg(pc, src1); + if (op == 0xc || op == 0xe) + inst[0] |= (src1->hw << 16); + else + inst[1] |= (src1->hw << 14); + } + } else { + inst[1] |= 0x0003c000; /*XXX FIXME */ + } + + if (src2) { + if (src2->type == P_CONST || src2->type == P_IMMD) { + inst[0] |= 0x01000000; /* src2 is const */ + inst[1] |= (src2->hw << 14); + } else { + alloc_reg(pc, src2); + if (inst[0] & 0x00800000 || op ==0xe) + inst[1] |= (src2->hw << 14); + else + inst[0] |= (src2->hw << 16); + } + } + + /*XXX: FIXME */ + if (op == 0xb || op == 0xc || op == 0x9 || op == 0xe) { + /* 0x04000000 negates arg0 */ + /* 0x08000000 negates arg1 */ + /*XXX: true for !0xb also ? */ + inst[1] |= 0x00000780; + } else { + /* 0x04000000 == arg0 32 bit, otherwise 16 bit */ + inst[1] |= 0x04000780; + } + } else { + inst[0] |= ((op & 0xf) << 28); + + alloc_reg(pc, dst); + inst[0] |= (dst->hw << 2); + + if (src0) { + alloc_reg(pc, src0); + inst[0] |= (src0->hw << 9); + } + + /*XXX: NFI if this even works - probably not.. */ + if (src1) { + alloc_reg(pc, src1); + inst[0] |= (src1->hw << 16); + } + } + + if (tmp) free_temp(pc, tmp); + if (tmp2) free_temp(pc, tmp2); + + if (inst[0] & 1) { + p->insns_nr += 2; + p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); + memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2); + } else { + p->insns_nr += 1; + p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); + memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned)); + } +} + +static struct nv50_reg * +tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) +{ + switch (dst->DstRegister.File) { + case TGSI_FILE_TEMPORARY: + return &pc->temp[dst->DstRegister.Index * 4 + c]; + case TGSI_FILE_OUTPUT: + return &pc->result[dst->DstRegister.Index * 4 + c]; + case TGSI_FILE_NULL: + return NULL; + default: + break; + } + + return NULL; +} + +static struct nv50_reg * +tgsi_src(struct nv50_pc *pc, int c, const struct tgsi_full_src_register *src) +{ + /* Handle swizzling */ + switch (c) { + case 0: c = src->SrcRegister.SwizzleX; break; + case 1: c = src->SrcRegister.SwizzleY; break; + case 2: c = src->SrcRegister.SwizzleZ; break; + case 3: c = src->SrcRegister.SwizzleW; break; + default: + assert(0); + } + + switch (src->SrcRegister.File) { + case TGSI_FILE_INPUT: + return &pc->attr[src->SrcRegister.Index * 4 + c]; + case TGSI_FILE_TEMPORARY: + return &pc->temp[src->SrcRegister.Index * 4 + c]; + case TGSI_FILE_CONSTANT: + return &pc->param[src->SrcRegister.Index * 4 + c]; + case TGSI_FILE_IMMEDIATE: + return &pc->immd[src->SrcRegister.Index * 4 + c]; + default: + break; + } + + return NULL; +} + +static boolean +nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) +{ + const struct tgsi_full_instruction *inst = &tok->FullInstruction; + struct nv50_reg *dst[4], *src[3][4], *none = NULL, *tmp; + unsigned mask; + int i, c; + + NOUVEAU_ERR("insn %p\n", tok); + + mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + + for (c = 0; c < 4; c++) { + if (mask & (1 << c)) + dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]); + else + dst[c] = NULL; + } + + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + for (c = 0; c < 4; c++) + src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]); + } + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ADD: + for (c = 0; c < 4; c++) { + if (mask & (1 << c)) { + emit(pc, 0x0b, dst[c], + src[0][c], src[1][c], none); + } + } + break; + case TGSI_OPCODE_DP3: + tmp = alloc_temp(pc, NULL); + emit(pc, 0x0c, tmp, src[0][0], src[1][0], NULL); + emit(pc, 0x0e, tmp, src[0][1], src[1][1], tmp); + emit(pc, 0x0e, tmp, src[0][2], src[1][2], tmp); + for (c = 0; c < 4; c++) { + if (mask & (1 << c)) + emit(pc, 0x01, dst[c], tmp, none, none); + } + free_temp(pc, tmp); + break; + case TGSI_OPCODE_DP4: + tmp = alloc_temp(pc, NULL); + emit(pc, 0x0c, tmp, src[0][0], src[1][0], NULL); + emit(pc, 0x0e, tmp, src[0][1], src[1][1], tmp); + emit(pc, 0x0e, tmp, src[0][2], src[1][2], tmp); + emit(pc, 0x0e, tmp, src[0][3], src[1][3], tmp); + for (c = 0; c < 4; c++) { + if (mask & (1 << c)) + emit(pc, 0x01, dst[c], tmp, none, none); + } + free_temp(pc, tmp); + break; + case TGSI_OPCODE_MAD: + for (c = 0; c < 4; c++) { + if (mask & (1 << c)) + emit(pc, 0x0e, dst[c], + src[0][c], src[1][c], src[2][c]); + } + break; + case TGSI_OPCODE_MOV: + for (c = 0; c < 4; c++) { + if (mask & (1 << c)) + emit(pc, 0x01, dst[c], src[0][c], none, none); + } + break; + case TGSI_OPCODE_MUL: + for (c = 0; c < 4; c++) { + if (mask & (1 << c)) + emit(pc, 0x0c, dst[c], + src[0][c], src[1][c], none); + } + break; + case TGSI_OPCODE_RCP: + for (c = 0; c < 4; c++) { + if (mask & (1 << c)) + emit(pc, 0x09, dst[c], + src[0][c], none, none); + } + break; + case TGSI_OPCODE_END: + break; + default: + NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); + return FALSE; + } + + return TRUE; +} + +static boolean +nv50_program_tx_prep(struct nv50_pc *pc) +{ + struct tgsi_parse_context p; + boolean ret = FALSE; + unsigned i, c; + + tgsi_parse_init(&p, pc->p->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&p)) { + const union tgsi_full_token *tok = &p.FullToken; + + tgsi_parse_token(&p); + switch (tok->Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + const struct tgsi_full_immediate *imm = + &p.FullToken.FullImmediate; + + pc->immd_nr++; + pc->immd_buf = realloc(pc->immd_buf, 4 * pc->immd_nr * + sizeof(float)); + pc->immd_buf[4 * (pc->immd_nr - 1) + 0] = + imm->u.ImmediateFloat32[0].Float; + pc->immd_buf[4 * (pc->immd_nr - 1) + 1] = + imm->u.ImmediateFloat32[1].Float; + pc->immd_buf[4 * (pc->immd_nr - 1) + 2] = + imm->u.ImmediateFloat32[2].Float; + pc->immd_buf[4 * (pc->immd_nr - 1) + 3] = + imm->u.ImmediateFloat32[3].Float; + } + break; + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *d; + unsigned last; + + d = &p.FullToken.FullDeclaration; + last = d->u.DeclarationRange.Last; + + switch (d->Declaration.File) { + case TGSI_FILE_TEMPORARY: + if (pc->temp_nr < (last + 1)) + pc->temp_nr = last + 1; + break; + case TGSI_FILE_OUTPUT: + if (pc->result_nr < (last + 1)) + pc->result_nr = last + 1; + break; + case TGSI_FILE_INPUT: + if (pc->attr_nr < (last + 1)) + pc->attr_nr = last + 1; + break; + case TGSI_FILE_CONSTANT: + if (pc->param_nr < (last + 1)) + pc->param_nr = last + 1; + break; + default: + NOUVEAU_ERR("bad decl file %d\n", + d->Declaration.File); + goto out_err; + } + } + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + break; + default: + break; + } + } + + NOUVEAU_ERR("%d temps\n", pc->temp_nr); + if (pc->temp_nr) { + pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg)); + if (!pc->temp) + goto out_err; + + for (i = 0; i < pc->temp_nr; i++) { + for (c = 0; c < 4; c++) { + pc->temp[i*4+c].type = P_TEMP; + pc->temp[i*4+c].hw = -1; + pc->temp[i*4+c].index = i; + } + } + } + + NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr); + if (pc->attr_nr) { + int aid = 0; + + pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg)); + if (!pc->attr) + goto out_err; + + for (i = 0; i < pc->attr_nr; i++) { + for (c = 0; c < 4; c++) { + pc->p->cfg.vp.attr[aid/32] |= (1 << (aid % 32)); + pc->attr[i*4+c].type = P_ATTR; + pc->attr[i*4+c].hw = aid++; + pc->attr[i*4+c].index = i; + } + } + } + + NOUVEAU_ERR("%d result regs\n", pc->result_nr); + if (pc->result_nr) { + int rid = 0; + + pc->result = calloc(pc->result_nr * 4, sizeof(struct nv50_reg)); + if (!pc->result) + goto out_err; + + for (i = 0; i < pc->result_nr; i++) { + for (c = 0; c < 4; c++) { + pc->result[i*4+c].type = P_RESULT; + pc->result[i*4+c].hw = rid++; + pc->result[i*4+c].index = i; + } + } + } + + NOUVEAU_ERR("%d param regs\n", pc->param_nr); + if (pc->param_nr) { + int rid = 0; + + pc->param = calloc(pc->param_nr * 4, sizeof(struct nv50_reg)); + if (!pc->param) + goto out_err; + + for (i = 0; i < pc->param_nr; i++) { + for (c = 0; c < 4; c++) { + pc->param[i*4+c].type = P_CONST; + pc->param[i*4+c].hw = rid++; + pc->param[i*4+c].index = i; + } + } + } + + if (pc->immd_nr) { + int rid = pc->param_nr * 4; + + pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg)); + if (!pc->immd) + goto out_err; + + for (i = 0; i < pc->immd_nr; i++) { + for (c = 0; c < 4; c++) { + pc->immd[i*4+c].type = P_IMMD; + pc->immd[i*4+c].hw = rid++; + pc->immd[i*4+c].index = i; + } + } + } + + ret = TRUE; +out_err: + tgsi_parse_free(&p); + return ret; +} + +static boolean +nv50_program_tx(struct nv50_program *p) +{ + struct tgsi_parse_context parse; + struct nv50_pc *pc; + boolean ret; + + pc = CALLOC_STRUCT(nv50_pc); + if (!pc) + return FALSE; + pc->p = p; + pc->p->cfg.vp.high_temp = 4; + + ret = nv50_program_tx_prep(pc); + if (ret == FALSE) + goto out_cleanup; + + tgsi_parse_init(&parse, pc->p->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&parse)) { + const union tgsi_full_token *tok = &parse.FullToken; + + tgsi_parse_token(&parse); + + switch (tok->Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + ret = nv50_program_tx_insn(pc, tok); + if (ret == FALSE) + goto out_err; + break; + default: + break; + } + } + + p->param_nr = pc->param_nr * 4; + p->immd_nr = pc->immd_nr * 4; + p->immd = pc->immd_buf; + +out_err: + tgsi_parse_free(&parse); + +out_cleanup: + return ret; +} + +static void +nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) +{ + struct tgsi_parse_context pc; + + tgsi_parse_init(&pc, p->pipe.tokens); + + if (pc.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + p->insns_nr = 8; + p->insns = malloc(p->insns_nr * sizeof(unsigned)); + p->insns[0] = 0x80000000; + p->insns[1] = 0x9000000c; + p->insns[2] = 0x82010600; + p->insns[3] = 0x82020604; + p->insns[4] = 0x80030609; + p->insns[5] = 0x00020780; + p->insns[6] = 0x8004060d; + p->insns[7] = 0x00020781; + } else + if (pc.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) { + int i; + + if (nv50_program_tx(p) == FALSE) + assert(0); + p->insns[p->insns_nr - 1] |= 0x00000001; + + for (i = 0; i < p->insns_nr; i++) + NOUVEAU_ERR("%d 0x%08x\n", i, p->insns[i]); + } else { + NOUVEAU_ERR("invalid TGSI processor\n"); + tgsi_parse_free(&pc); + return; + } + + tgsi_parse_free(&pc); + + p->translated = TRUE; +} + +void +nv50_vertprog_validate(struct nv50_context *nv50) +{ + struct pipe_winsys *ws = nv50->pipe.winsys; + struct nouveau_winsys *nvws = nv50->screen->nvws; + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_program *p = nv50->vertprog; + struct nouveau_stateobj *so; + void *map; + int i; + + if (!p->translated) { + nv50_program_validate(nv50, p); + if (!p->translated) + assert(0); + } + + if (!p->buffer) + p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4); + map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + memcpy(map, p->insns, p->insns_nr * 4); + ws->buffer_unmap(ws, p->buffer); + + if (p->param_nr) { + float *cb; + + cb = ws->buffer_map(ws, nv50->constbuf[PIPE_SHADER_VERTEX], + PIPE_BUFFER_USAGE_CPU_READ); + for (i = 0; i < p->param_nr; i++) { + BEGIN_RING(tesla, 0x0f00, 2); + OUT_RING (i << 8); + OUT_RING (fui(cb[i])); + } + ws->buffer_unmap(ws, nv50->constbuf[PIPE_SHADER_VERTEX]); + } + + + for (i = 0; i < p->immd_nr; i++) { + BEGIN_RING(tesla, 0x0f00, 2); + OUT_RING ((p->param_nr + i) << 8); + OUT_RING (fui(p->immd[i])); + } + + so = so_new(11, 2); + so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); + so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, tesla, 0x1650, 2); + so_data (so, p->cfg.vp.attr[0]); + so_data (so, p->cfg.vp.attr[1]); + so_method(so, tesla, 0x16ac, 2); + so_data (so, 8); + so_data (so, p->cfg.vp.high_temp); + so_method(so, tesla, 0x140c, 1); + so_data (so, 0); /* program start offset */ + so_emit(nv50->screen->nvws, so); + so_ref(NULL, &so); +} + +void +nv50_fragprog_validate(struct nv50_context *nv50) +{ + struct pipe_winsys *ws = nv50->pipe.winsys; + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_program *p = nv50->fragprog; + struct nouveau_stateobj *so; + void *map; + + if (!p->translated) { + nv50_program_validate(nv50, p); + if (!p->translated) + assert(0); + } + + if (!p->buffer) + p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4); + map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + memcpy(map, p->insns, p->insns_nr * 4); + ws->buffer_unmap(ws, p->buffer); + + so = so_new(3, 2); + so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); + so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_LOW, 0, 0); + so_emit(nv50->screen->nvws, so); + so_ref(NULL, &so); +} + +void +nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) +{ + struct pipe_winsys *ws = nv50->pipe.winsys; + + if (p->insns_nr) { + if (p->insns) + FREE(p->insns); + p->insns_nr = 0; + } + + if (p->buffer) + pipe_buffer_reference(ws, &p->buffer, NULL); + + p->translated = 0; +} + -- cgit v1.2.3 From 716c1cd2ecbc1e86c0fd747c9fa9e095ded5fd5d Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sun, 1 Jun 2008 23:10:31 +1000 Subject: nv50: use "real" constbufs for shaders + tcb uploads --- src/gallium/drivers/nv50/nv50_context.h | 14 ++++++- src/gallium/drivers/nv50/nv50_program.c | 27 +++++------- src/gallium/drivers/nv50/nv50_screen.c | 57 +++++++++++++++----------- src/gallium/drivers/nv50/nv50_screen.h | 3 ++ src/gallium/drivers/nv50/nv50_state.c | 4 +- src/gallium/drivers/nv50/nv50_state.h | 1 - src/gallium/drivers/nv50/nv50_state_validate.c | 29 +++++++++++++ 7 files changed, 90 insertions(+), 45 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index d4d716b94b..c4a8a4c064 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -23,6 +23,14 @@ #define NOUVEAU_MSG(fmt, args...) \ fprintf(stderr, "nouveau: "fmt, ##args); +/* Constant buffer assignment */ +#define NV50_CB_PMISC 0 +#define NV50_CB_PVP 1 +#define NV50_CB_PFP 2 +#define NV50_CB_PGP 3 +#define NV50_CB_TIC 4 +#define NV50_CB_TSC 5 + #define NV50_NEW_BLEND (1 << 0) #define NV50_NEW_ZSA (1 << 1) #define NV50_NEW_BLEND_COLOUR (1 << 2) @@ -32,8 +40,10 @@ #define NV50_NEW_RASTERIZER (1 << 6) #define NV50_NEW_FRAMEBUFFER (1 << 7) #define NV50_NEW_VERTPROG (1 << 8) -#define NV50_NEW_FRAGPROG (1 << 9) -#define NV50_NEW_ARRAYS (1 << 10) +#define NV50_NEW_VERTPROG_CB (1 << 9) +#define NV50_NEW_FRAGPROG (1 << 10) +#define NV50_NEW_FRAGPROG_CB (1 << 11) +#define NV50_NEW_ARRAYS (1 << 12) struct nv50_blend_stateobj { struct pipe_blend_state pipe; diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 30953b7d8a..0a43646923 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -179,6 +179,10 @@ emit(struct nv50_pc *pc, unsigned op, struct nv50_reg *dst, if (src1) { if (src1->type == P_CONST || src1->type == P_IMMD) { + if (src1->type == P_IMMD) + inst[1] |= (NV50_CB_PMISC << 22); + else + inst[1] |= (NV50_CB_PVP << 22); inst[0] |= 0x00800000; /* src1 is const */ /*XXX: does src1 come from "src2" now? */ alloc_reg(pc, src1); @@ -196,6 +200,10 @@ emit(struct nv50_pc *pc, unsigned op, struct nv50_reg *dst, if (src2) { if (src2->type == P_CONST || src2->type == P_IMMD) { + if (src2->type == P_IMMD) + inst[1] |= (NV50_CB_PMISC << 22); + else + inst[1] |= (NV50_CB_PVP << 22); inst[0] |= 0x01000000; /* src2 is const */ inst[1] |= (src2->hw << 14); } else { @@ -526,7 +534,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) } if (pc->immd_nr) { - int rid = pc->param_nr * 4; + int rid = 0; pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg)); if (!pc->immd) @@ -581,7 +589,6 @@ nv50_program_tx(struct nv50_program *p) } } - p->param_nr = pc->param_nr * 4; p->immd_nr = pc->immd_nr * 4; p->immd = pc->immd_buf; @@ -654,23 +661,9 @@ nv50_vertprog_validate(struct nv50_context *nv50) memcpy(map, p->insns, p->insns_nr * 4); ws->buffer_unmap(ws, p->buffer); - if (p->param_nr) { - float *cb; - - cb = ws->buffer_map(ws, nv50->constbuf[PIPE_SHADER_VERTEX], - PIPE_BUFFER_USAGE_CPU_READ); - for (i = 0; i < p->param_nr; i++) { - BEGIN_RING(tesla, 0x0f00, 2); - OUT_RING (i << 8); - OUT_RING (fui(cb[i])); - } - ws->buffer_unmap(ws, nv50->constbuf[PIPE_SHADER_VERTEX]); - } - - for (i = 0; i < p->immd_nr; i++) { BEGIN_RING(tesla, 0x0f00, 2); - OUT_RING ((p->param_nr + i) << 8); + OUT_RING ((NV50_CB_PMISC << 16) | (i << 8)); OUT_RING (fui(p->immd[i])); } diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index fc3eeed913..6c0810a9cf 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -203,14 +203,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) return NULL; } - /* Static constant buffer */ - screen->constbuf = ws->buffer_create(ws, 0, 0, 256 * 4 * 4); - if (nvws->res_init(&screen->vp_data_heap, 0, 256)) { - NOUVEAU_ERR("Error initialising constant buffer\n"); - nv50_screen_destroy(&screen->pipe); - return NULL; - } - /* Static tesla init */ so = so_new(256, 20); @@ -245,37 +237,56 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) so_method(so, screen->tesla, 0x16b8, 1); so_data (so, 8); + /* Shared constant buffer */ + screen->constbuf = ws->buffer_create(ws, 0, 0, 256 * 4 * 4); + if (nvws->res_init(&screen->vp_data_heap, 0, 256)) { + NOUVEAU_ERR("Error initialising constant buffer\n"); + nv50_screen_destroy(&screen->pipe); + return NULL; + } + so_method(so, screen->tesla, 0x1280, 3); so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_data (so, 0x00001000); + so_data (so, (NV50_CB_PMISC << 16) | 0x00001000); + + /* Texture sampler/image unit setup - we abuse the constant buffer + * upload mechanism for the moment to upload data to the tex config + * blocks. At some point we *may* want to go the NVIDIA way of doing + * things? + */ + screen->tic = ws->buffer_create(ws, 0, 0, 32 * 8 * 4); so_method(so, screen->tesla, 0x1280, 3); - so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_data (so, 0x00014000); - so_method(so, screen->tesla, 0x1280, 3); - so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + so_data (so, (NV50_CB_TIC << 16) | 0x0800); + so_method(so, screen->tesla, 0x1574, 3); + so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_data (so, 0x00024000); + so_data (so, 0x00000800); + + screen->tsc = ws->buffer_create(ws, 0, 0, 32 * 8 * 4); so_method(so, screen->tesla, 0x1280, 3); - so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_data (so, 0x00034000); - so_method(so, screen->tesla, 0x1280, 3); - so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + so_data (so, (NV50_CB_TSC << 16) | 0x0800); + so_method(so, screen->tesla, 0x155c, 3); + so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_data (so, 0x00040100); + so_data (so, 0x00000800); + + /* Vertex array limits - max them out */ for (i = 0; i < 16; i++) { so_method(so, screen->tesla, 0x1080 + (i * 8), 2); so_data (so, 0x000000ff); diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index d63dd48508..5acb5003ba 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -15,6 +15,9 @@ struct nv50_screen { struct pipe_buffer *constbuf; struct nouveau_resource *vp_data_heap; + + struct pipe_buffer *tic; + struct pipe_buffer *tsc; }; static INLINE struct nv50_screen * diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index fd10a38378..ba3d04cede 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -406,11 +406,11 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, if (shader == PIPE_SHADER_VERTEX) { nv50->constbuf[PIPE_SHADER_VERTEX] = buf->buffer; - nv50->dirty |= NV50_NEW_VERTPROG; + nv50->dirty |= NV50_NEW_VERTPROG_CB; } else if (shader == PIPE_SHADER_FRAGMENT) { nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf->buffer; - nv50->dirty |= NV50_NEW_FRAGPROG; + nv50->dirty |= NV50_NEW_FRAGPROG_CB; } } diff --git a/src/gallium/drivers/nv50/nv50_state.h b/src/gallium/drivers/nv50/nv50_state.h index be0c75ad6e..9e3876871b 100644 --- a/src/gallium/drivers/nv50/nv50_state.h +++ b/src/gallium/drivers/nv50/nv50_state.h @@ -20,7 +20,6 @@ struct nv50_program { struct pipe_buffer *buffer; - unsigned param_nr; float *immd; unsigned immd_nr; diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 4a548378b7..05395c6df7 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -168,6 +168,35 @@ nv50_state_validate(struct nv50_context *nv50) so_ref(NULL, &so); } + if (nv50->dirty & NV50_NEW_VERTPROG_CB) { + so = so_new(4, 2); + so_method(so, tesla, 0x1280, 3); + so_reloc (so, nv50->constbuf[PIPE_SHADER_VERTEX], 0, + NOUVEAU_BO_HIGH | NOUVEAU_BO_RD | NOUVEAU_BO_VRAM, + 0, 0); + so_reloc (so, nv50->constbuf[PIPE_SHADER_VERTEX], 0, + NOUVEAU_BO_LOW | NOUVEAU_BO_RD | NOUVEAU_BO_VRAM, + 0, 0); + so_data (so, (NV50_CB_PVP << 16) | 0x1000); + so_emit(nvws, so); + so_ref(NULL, &so); + } + + if (nv50->dirty & NV50_NEW_FRAGPROG_CB) { + so = so_new(4, 2); + so_method(so, tesla, 0x1280, 3); + so_reloc (so, nv50->constbuf[PIPE_SHADER_FRAGMENT], 0, + NOUVEAU_BO_HIGH | NOUVEAU_BO_RD | NOUVEAU_BO_VRAM, + 0, 0); + so_reloc (so, nv50->constbuf[PIPE_SHADER_FRAGMENT], 0, + NOUVEAU_BO_LOW | NOUVEAU_BO_RD | NOUVEAU_BO_VRAM, + 0, 0); + so_data (so, (NV50_CB_PFP << 16) | 0x1000); + so_emit(nvws, so); + so_ref(NULL, &so); + } + + if (nv50->dirty & NV50_NEW_ARRAYS) nv50_vbo_validate(nv50); -- cgit v1.2.3 From 41cd9bddf77ea60f84a957e83ddf098818c95c41 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sun, 1 Jun 2008 23:16:17 +1000 Subject: nv50: fucking horrible hack, I really hate G8x shaders.. --- src/gallium/drivers/nv50/nv50_program.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 0a43646923..1b192b897c 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -135,10 +135,18 @@ emit(struct nv50_pc *pc, unsigned op, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2) { struct nv50_program *p = pc->p; - struct nv50_reg *tmp = NULL, *tmp2 = NULL; + struct nv50_reg *tmp0 = NULL, *tmp = NULL, *tmp2 = NULL; unsigned inst[2] = { 0, 0 }; /* Grr.. Fun restrictions on where attribs can be sourced from.. */ + if (src0 && (src0->type == P_CONST || src0->type == P_IMMD) && + (op == 0xc || op == 0xe)) { + tmp = src1; + src1 = src0; + src0 = tmp; + tmp = NULL; + } + if (src1 && src1->type == P_ATTR) { tmp = alloc_temp(pc, dst); emit(pc, 1, tmp, src1, NULL, NULL); @@ -243,6 +251,7 @@ emit(struct nv50_pc *pc, unsigned op, struct nv50_reg *dst, } } + if (tmp0) free_temp(pc, tmp0); if (tmp) free_temp(pc, tmp); if (tmp2) free_temp(pc, tmp2); -- cgit v1.2.3 From 38ce697e5942550888c28bd4859ca2a92f247bf7 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 2 Jun 2008 12:08:06 +1000 Subject: nv50: implement SUB --- src/gallium/drivers/nv50/nv50_program.c | 63 +++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 19 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 1b192b897c..41ff05dabe 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -11,9 +11,11 @@ #include "nv50_state.h" #define OP_MOV 0x001 +#define OP_INTERP 0x008 #define OP_RCP 0x009 #define OP_ADD 0x00b #define OP_MUL 0x00c +#define OP_MAD 0x00e #define NV50_SU_MAX_TEMP 64 struct nv50_reg { @@ -27,6 +29,7 @@ struct nv50_reg { int index; int hw; + int neg; }; struct nv50_pc { @@ -140,7 +143,7 @@ emit(struct nv50_pc *pc, unsigned op, struct nv50_reg *dst, /* Grr.. Fun restrictions on where attribs can be sourced from.. */ if (src0 && (src0->type == P_CONST || src0->type == P_IMMD) && - (op == 0xc || op == 0xe)) { + (op == OP_MUL || op == OP_MAD)) { tmp = src1; src1 = src0; src0 = tmp; @@ -149,12 +152,14 @@ emit(struct nv50_pc *pc, unsigned op, struct nv50_reg *dst, if (src1 && src1->type == P_ATTR) { tmp = alloc_temp(pc, dst); + tmp->neg = src1->neg; src1->neg = 0; emit(pc, 1, tmp, src1, NULL, NULL); src1 = tmp; } if (src2 && src2->type == P_ATTR) { tmp2 = alloc_temp(pc, dst); + tmp2->neg = src2->neg; src2->neg = 0; emit(pc, 1, tmp2, src2, NULL, NULL); src2 = tmp2; } @@ -197,7 +202,7 @@ emit(struct nv50_pc *pc, unsigned op, struct nv50_reg *dst, inst[0] |= (src1->hw << 16); } else { alloc_reg(pc, src1); - if (op == 0xc || op == 0xe) + if (op == OP_MUL || op == OP_MAD) inst[0] |= (src1->hw << 16); else inst[1] |= (src1->hw << 14); @@ -216,7 +221,7 @@ emit(struct nv50_pc *pc, unsigned op, struct nv50_reg *dst, inst[1] |= (src2->hw << 14); } else { alloc_reg(pc, src2); - if (inst[0] & 0x00800000 || op ==0xe) + if (inst[0] & 0x00800000 || op == OP_MAD) inst[1] |= (src2->hw << 14); else inst[0] |= (src2->hw << 16); @@ -224,14 +229,24 @@ emit(struct nv50_pc *pc, unsigned op, struct nv50_reg *dst, } /*XXX: FIXME */ - if (op == 0xb || op == 0xc || op == 0x9 || op == 0xe) { + switch (op) { + case OP_ADD: + case OP_MUL: + case OP_RCP: + case OP_MAD: /* 0x04000000 negates arg0 */ /* 0x08000000 negates arg1 */ /*XXX: true for !0xb also ? */ + if (src0 && src0->neg) + inst[1] |= 0x04000000; + if (src1 && src1->neg) + inst[1] |= 0x08000000; inst[1] |= 0x00000780; - } else { + break; + default: /* 0x04000000 == arg0 32 bit, otherwise 16 bit */ inst[1] |= 0x04000780; + break; } } else { inst[0] |= ((op & 0xf) << 28); @@ -340,61 +355,71 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) case TGSI_OPCODE_ADD: for (c = 0; c < 4; c++) { if (mask & (1 << c)) { - emit(pc, 0x0b, dst[c], + emit(pc, OP_ADD, dst[c], src[0][c], src[1][c], none); } } break; case TGSI_OPCODE_DP3: tmp = alloc_temp(pc, NULL); - emit(pc, 0x0c, tmp, src[0][0], src[1][0], NULL); - emit(pc, 0x0e, tmp, src[0][1], src[1][1], tmp); - emit(pc, 0x0e, tmp, src[0][2], src[1][2], tmp); + emit(pc, OP_MUL, tmp, src[0][0], src[1][0], NULL); + emit(pc, OP_MAD, tmp, src[0][1], src[1][1], tmp); + emit(pc, OP_MAD, tmp, src[0][2], src[1][2], tmp); for (c = 0; c < 4; c++) { if (mask & (1 << c)) - emit(pc, 0x01, dst[c], tmp, none, none); + emit(pc, OP_MOV, dst[c], tmp, none, none); } free_temp(pc, tmp); break; case TGSI_OPCODE_DP4: tmp = alloc_temp(pc, NULL); - emit(pc, 0x0c, tmp, src[0][0], src[1][0], NULL); - emit(pc, 0x0e, tmp, src[0][1], src[1][1], tmp); - emit(pc, 0x0e, tmp, src[0][2], src[1][2], tmp); - emit(pc, 0x0e, tmp, src[0][3], src[1][3], tmp); + emit(pc, OP_MUL, tmp, src[0][0], src[1][0], NULL); + emit(pc, OP_MAD, tmp, src[0][1], src[1][1], tmp); + emit(pc, OP_MAD, tmp, src[0][2], src[1][2], tmp); + emit(pc, OP_MAD, tmp, src[0][3], src[1][3], tmp); for (c = 0; c < 4; c++) { if (mask & (1 << c)) - emit(pc, 0x01, dst[c], tmp, none, none); + emit(pc, OP_MOV, dst[c], tmp, none, none); } free_temp(pc, tmp); break; case TGSI_OPCODE_MAD: for (c = 0; c < 4; c++) { if (mask & (1 << c)) - emit(pc, 0x0e, dst[c], + emit(pc, OP_MAD, dst[c], src[0][c], src[1][c], src[2][c]); } break; case TGSI_OPCODE_MOV: for (c = 0; c < 4; c++) { if (mask & (1 << c)) - emit(pc, 0x01, dst[c], src[0][c], none, none); + emit(pc, OP_MOV, dst[c], src[0][c], none, none); } break; case TGSI_OPCODE_MUL: for (c = 0; c < 4; c++) { if (mask & (1 << c)) - emit(pc, 0x0c, dst[c], + emit(pc, OP_MUL, dst[c], src[0][c], src[1][c], none); } break; case TGSI_OPCODE_RCP: for (c = 0; c < 4; c++) { if (mask & (1 << c)) - emit(pc, 0x09, dst[c], + emit(pc, OP_RCP, dst[c], src[0][c], none, none); } break; + case TGSI_OPCODE_SUB: + for (c = 0; c < 4; c++) { + if (mask & (1 << c)) { + src[1][c]->neg = 1; + emit(pc, OP_ADD, dst[c], + src[0][c], src[1][c], none); + src[1][c]->neg = 0; + } + } + break; case TGSI_OPCODE_END: break; default: -- cgit v1.2.3 From 207b7974723c6b88aacfa3703a1e049ff35db6a8 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 2 Jun 2008 12:12:16 +1000 Subject: nv50: DPH --- src/gallium/drivers/nv50/nv50_program.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 41ff05dabe..12bd419f4c 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -383,6 +383,18 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } free_temp(pc, tmp); break; + case TGSI_OPCODE_DPH: + tmp = alloc_temp(pc, NULL); + emit(pc, OP_MUL, tmp, src[0][0], src[1][0], NULL); + emit(pc, OP_MAD, tmp, src[0][1], src[1][1], tmp); + emit(pc, OP_MAD, tmp, src[0][2], src[1][2], tmp); + emit(pc, OP_ADD, tmp, src[1][3], tmp, NULL); + for (c = 0; c < 4; c++) { + if (mask & (1 << c)) + emit(pc, OP_MOV, dst[c], tmp, none, none); + } + free_temp(pc, tmp); + break; case TGSI_OPCODE_MAD: for (c = 0; c < 4; c++) { if (mask & (1 << c)) -- cgit v1.2.3 From 22e0acc466947b203574c88f4964f61ef46ae3fd Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 2 Jun 2008 13:01:09 +1000 Subject: nv50: split code/data upload out, fp will use it later on --- src/gallium/drivers/nv50/nv50_program.c | 42 +++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 15 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 12bd419f4c..97e3eab906 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -684,16 +684,37 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) p->translated = TRUE; } +static void +nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) +{ + int i; + + for (i = 0; i < p->immd_nr; i++) { + BEGIN_RING(tesla, 0x0f00, 2); + OUT_RING ((NV50_CB_PMISC << 16) | (i << 8)); + OUT_RING (fui(p->immd[i])); + } +} + +static void +nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) +{ + struct pipe_winsys *ws = nv50->pipe.winsys; + void *map; + + if (!p->buffer) + p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4); + map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + memcpy(map, p->insns, p->insns_nr * 4); + ws->buffer_unmap(ws, p->buffer); +} + void nv50_vertprog_validate(struct nv50_context *nv50) { - struct pipe_winsys *ws = nv50->pipe.winsys; - struct nouveau_winsys *nvws = nv50->screen->nvws; struct nouveau_grobj *tesla = nv50->screen->tesla; struct nv50_program *p = nv50->vertprog; struct nouveau_stateobj *so; - void *map; - int i; if (!p->translated) { nv50_program_validate(nv50, p); @@ -701,17 +722,8 @@ nv50_vertprog_validate(struct nv50_context *nv50) assert(0); } - if (!p->buffer) - p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4); - map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); - memcpy(map, p->insns, p->insns_nr * 4); - ws->buffer_unmap(ws, p->buffer); - - for (i = 0; i < p->immd_nr; i++) { - BEGIN_RING(tesla, 0x0f00, 2); - OUT_RING ((NV50_CB_PMISC << 16) | (i << 8)); - OUT_RING (fui(p->immd[i])); - } + nv50_program_validate_data(nv50, p); + nv50_program_validate_code(nv50, p); so = so_new(11, 2); so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); -- cgit v1.2.3 From 55b2fe1047b37d0d86641a252e1c745111030393 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 2 Jun 2008 23:52:58 +1000 Subject: nv50: drop the majority of the old shader code, reimplement, only MOV so far. --- src/gallium/drivers/nv50/nv50_program.c | 411 ++++++++++++-------------------- 1 file changed, 146 insertions(+), 265 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 97e3eab906..aa848faa49 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -10,13 +10,8 @@ #include "nv50_context.h" #include "nv50_state.h" -#define OP_MOV 0x001 -#define OP_INTERP 0x008 -#define OP_RCP 0x009 -#define OP_ADD 0x00b -#define OP_MUL 0x00c -#define OP_MAD 0x00e #define NV50_SU_MAX_TEMP 64 +#define TX_FRAGPROG 0 struct nv50_reg { enum { @@ -106,181 +101,6 @@ free_temp(struct nv50_pc *pc, struct nv50_reg *r) } } -#if 0 -static struct nv50_reg * -constant(struct nv50_pc *pc, int pipe, int c, float v) -{ - struct nv50_reg *r = CALLOC_STRUCT(nv50_reg); - struct nv50_program *p = pc->p; - struct nv50_program_data *pd; - int idx; - - if (pipe >= 0) { - for (idx = 0; idx < p->nr_consts; idx++) { - if (p->consts[idx].index == pipe) - return nv40_sr(NV40SR_CONST, idx); - } - } - - idx = p->nr_consts++; - p->consts = realloc(p->consts, sizeof(*pd) * p->nr_consts); - pd = &p->consts[idx]; - - pd->index = pipe; - pd->component = c; - pd->value = v; - return nv40_sr(NV40SR_CONST, idx); -} -#endif - -static void -emit(struct nv50_pc *pc, unsigned op, struct nv50_reg *dst, - struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2) -{ - struct nv50_program *p = pc->p; - struct nv50_reg *tmp0 = NULL, *tmp = NULL, *tmp2 = NULL; - unsigned inst[2] = { 0, 0 }; - - /* Grr.. Fun restrictions on where attribs can be sourced from.. */ - if (src0 && (src0->type == P_CONST || src0->type == P_IMMD) && - (op == OP_MUL || op == OP_MAD)) { - tmp = src1; - src1 = src0; - src0 = tmp; - tmp = NULL; - } - - if (src1 && src1->type == P_ATTR) { - tmp = alloc_temp(pc, dst); - tmp->neg = src1->neg; src1->neg = 0; - emit(pc, 1, tmp, src1, NULL, NULL); - src1 = tmp; - } - - if (src2 && src2->type == P_ATTR) { - tmp2 = alloc_temp(pc, dst); - tmp2->neg = src2->neg; src2->neg = 0; - emit(pc, 1, tmp2, src2, NULL, NULL); - src2 = tmp2; - } - - /* Get this out of the way first. What type of opcode do we - * want/need to build? - */ - if ((op & 0x3f0) || dst->type == P_RESULT || - (src0 && src0->type == P_ATTR) || src1 || src2) - inst[0] |= 0x00000001; - - if (inst[0] & 0x00000001) { - inst[0] |= ((op & 0xf) << 28); - inst[1] |= ((op >> 4) << 26); - - alloc_reg(pc, dst); - if (dst->type == P_RESULT) - inst[1] |= 0x00000008; - inst[0] |= (dst->hw << 2); - - if (src0) { - if (src0->type == P_ATTR) - inst[1] |= 0x00200000; - else - if (src0->type == P_CONST || src0->type == P_IMMD) - assert(0); - alloc_reg(pc, src0); - inst[0] |= (src0->hw << 9); - } - - if (src1) { - if (src1->type == P_CONST || src1->type == P_IMMD) { - if (src1->type == P_IMMD) - inst[1] |= (NV50_CB_PMISC << 22); - else - inst[1] |= (NV50_CB_PVP << 22); - inst[0] |= 0x00800000; /* src1 is const */ - /*XXX: does src1 come from "src2" now? */ - alloc_reg(pc, src1); - inst[0] |= (src1->hw << 16); - } else { - alloc_reg(pc, src1); - if (op == OP_MUL || op == OP_MAD) - inst[0] |= (src1->hw << 16); - else - inst[1] |= (src1->hw << 14); - } - } else { - inst[1] |= 0x0003c000; /*XXX FIXME */ - } - - if (src2) { - if (src2->type == P_CONST || src2->type == P_IMMD) { - if (src2->type == P_IMMD) - inst[1] |= (NV50_CB_PMISC << 22); - else - inst[1] |= (NV50_CB_PVP << 22); - inst[0] |= 0x01000000; /* src2 is const */ - inst[1] |= (src2->hw << 14); - } else { - alloc_reg(pc, src2); - if (inst[0] & 0x00800000 || op == OP_MAD) - inst[1] |= (src2->hw << 14); - else - inst[0] |= (src2->hw << 16); - } - } - - /*XXX: FIXME */ - switch (op) { - case OP_ADD: - case OP_MUL: - case OP_RCP: - case OP_MAD: - /* 0x04000000 negates arg0 */ - /* 0x08000000 negates arg1 */ - /*XXX: true for !0xb also ? */ - if (src0 && src0->neg) - inst[1] |= 0x04000000; - if (src1 && src1->neg) - inst[1] |= 0x08000000; - inst[1] |= 0x00000780; - break; - default: - /* 0x04000000 == arg0 32 bit, otherwise 16 bit */ - inst[1] |= 0x04000780; - break; - } - } else { - inst[0] |= ((op & 0xf) << 28); - - alloc_reg(pc, dst); - inst[0] |= (dst->hw << 2); - - if (src0) { - alloc_reg(pc, src0); - inst[0] |= (src0->hw << 9); - } - - /*XXX: NFI if this even works - probably not.. */ - if (src1) { - alloc_reg(pc, src1); - inst[0] |= (src1->hw << 16); - } - } - - if (tmp0) free_temp(pc, tmp0); - if (tmp) free_temp(pc, tmp); - if (tmp2) free_temp(pc, tmp2); - - if (inst[0] & 1) { - p->insns_nr += 2; - p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); - memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2); - } else { - p->insns_nr += 1; - p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); - memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned)); - } -} - static struct nv50_reg * tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) { @@ -327,11 +147,147 @@ tgsi_src(struct nv50_pc *pc, int c, const struct tgsi_full_src_register *src) return NULL; } +static void +emit(struct nv50_pc *pc, unsigned *inst) +{ + struct nv50_program *p = pc->p; + + if (inst[0] & 1) { + p->insns_nr += 2; + p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); + memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2); + } else { + p->insns_nr += 1; + p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); + memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned)); + } +} + +static INLINE void set_long(struct nv50_pc *, unsigned *); + +static boolean +is_long(unsigned *inst) +{ + if (inst[0] & 1) + return TRUE; + return FALSE; +} + +static boolean +is_immd(unsigned *inst) +{ + if (is_long(inst) && (inst[1] & 3) == 3) + return TRUE; + return FALSE; +} + +static INLINE void +set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, unsigned *inst) +{ + set_long(pc, inst); + inst[1] &= ~((0x1f << 7) | (0x3 << 12)); + inst[1] |= (pred << 7) | (idx << 12); +} + +static INLINE void +set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, unsigned *inst) +{ + set_long(pc, inst); + inst[1] &= ~((0x3 << 4) | (1 << 6)); + inst[1] |= (idx << 4) | (on << 6); +} + +static INLINE void +set_long(struct nv50_pc *pc, unsigned *inst) +{ + if (is_long(inst)) + return; + + inst[0] |= 1; + set_pred(pc, 0xf, 0, inst); + set_pred_wr(pc, 0, 0, inst); +} + +static INLINE void +set_dst(struct nv50_pc *pc, struct nv50_reg *dst, unsigned *inst) +{ + if (dst->type == P_RESULT) { + set_long(pc, inst); + inst[1] |= 0x00000008; + } + + alloc_reg(pc, dst); + inst[0] |= (dst->hw << 2); +} + +static INLINE void +set_immd(struct nv50_pc *pc, struct nv50_reg *imm, unsigned *inst) +{ + unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */ + + set_long(pc, inst); + /*XXX: can't be predicated - bits overlap.. catch cases where both + * are required and avoid them. */ + set_pred(pc, 0, 0, inst); + set_pred_wr(pc, 0, 0, inst); + + inst[1] |= 0x00000002 | 0x00000001; + inst[0] |= (val & 0x3f) << 16; + inst[1] |= (val >> 6) << 2; +} + +static void +emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + unsigned inst[2] = { 0, 0 }; + int i; + + inst[0] |= 0x10000000; + + set_dst(pc, dst, inst); + + if (dst->type != P_RESULT && src->type == P_IMMD) { + set_immd(pc, src, inst); + /*XXX: 32-bit, but steals part of "half" reg space - need to + * catch and handle this case if/when we do half-regs + */ + inst[0] |= 0x00008000; + } else + if (src->type == P_IMMD || src->type == P_CONST) { + set_long(pc, inst); + if (src->type == P_IMMD) + inst[1] |= (NV50_CB_PMISC << 22); + else + inst[1] |= (NV50_CB_PVP << 22); + inst[0] |= (src->hw << 9); + inst[1] |= 0x20000000; /* src0 const? */ + } else { + if (src->type == P_ATTR) { + set_long(pc, inst); + inst[1] |= 0x00200000; + } + + alloc_reg(pc, src); + inst[0] |= (src->hw << 9); + } + + /* We really should support "half" instructions here at some point, + * but I don't feel confident enough about them yet. + */ + set_long(pc, inst); + if (is_long(inst) && !is_immd(inst)) { + inst[1] |= 0x04000000; /* 32-bit */ + inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */ + } + + emit(pc, inst); +} + static boolean nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) { const struct tgsi_full_instruction *inst = &tok->FullInstruction; - struct nv50_reg *dst[4], *src[3][4], *none = NULL, *tmp; + struct nv50_reg *dst[4], *src[3][4]; unsigned mask; int i, c; @@ -352,85 +308,9 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ADD: - for (c = 0; c < 4; c++) { - if (mask & (1 << c)) { - emit(pc, OP_ADD, dst[c], - src[0][c], src[1][c], none); - } - } - break; - case TGSI_OPCODE_DP3: - tmp = alloc_temp(pc, NULL); - emit(pc, OP_MUL, tmp, src[0][0], src[1][0], NULL); - emit(pc, OP_MAD, tmp, src[0][1], src[1][1], tmp); - emit(pc, OP_MAD, tmp, src[0][2], src[1][2], tmp); - for (c = 0; c < 4; c++) { - if (mask & (1 << c)) - emit(pc, OP_MOV, dst[c], tmp, none, none); - } - free_temp(pc, tmp); - break; - case TGSI_OPCODE_DP4: - tmp = alloc_temp(pc, NULL); - emit(pc, OP_MUL, tmp, src[0][0], src[1][0], NULL); - emit(pc, OP_MAD, tmp, src[0][1], src[1][1], tmp); - emit(pc, OP_MAD, tmp, src[0][2], src[1][2], tmp); - emit(pc, OP_MAD, tmp, src[0][3], src[1][3], tmp); - for (c = 0; c < 4; c++) { - if (mask & (1 << c)) - emit(pc, OP_MOV, dst[c], tmp, none, none); - } - free_temp(pc, tmp); - break; - case TGSI_OPCODE_DPH: - tmp = alloc_temp(pc, NULL); - emit(pc, OP_MUL, tmp, src[0][0], src[1][0], NULL); - emit(pc, OP_MAD, tmp, src[0][1], src[1][1], tmp); - emit(pc, OP_MAD, tmp, src[0][2], src[1][2], tmp); - emit(pc, OP_ADD, tmp, src[1][3], tmp, NULL); - for (c = 0; c < 4; c++) { - if (mask & (1 << c)) - emit(pc, OP_MOV, dst[c], tmp, none, none); - } - free_temp(pc, tmp); - break; - case TGSI_OPCODE_MAD: - for (c = 0; c < 4; c++) { - if (mask & (1 << c)) - emit(pc, OP_MAD, dst[c], - src[0][c], src[1][c], src[2][c]); - } - break; case TGSI_OPCODE_MOV: - for (c = 0; c < 4; c++) { - if (mask & (1 << c)) - emit(pc, OP_MOV, dst[c], src[0][c], none, none); - } - break; - case TGSI_OPCODE_MUL: - for (c = 0; c < 4; c++) { - if (mask & (1 << c)) - emit(pc, OP_MUL, dst[c], - src[0][c], src[1][c], none); - } - break; - case TGSI_OPCODE_RCP: - for (c = 0; c < 4; c++) { - if (mask & (1 << c)) - emit(pc, OP_RCP, dst[c], - src[0][c], none, none); - } - break; - case TGSI_OPCODE_SUB: - for (c = 0; c < 4; c++) { - if (mask & (1 << c)) { - src[1][c]->neg = 1; - emit(pc, OP_ADD, dst[c], - src[0][c], src[1][c], none); - src[1][c]->neg = 0; - } - } + for (c = 0; c < 4; c++) + emit_mov(pc, dst[c], src[0][c]); break; case TGSI_OPCODE_END: break; @@ -648,6 +528,7 @@ out_cleanup: static void nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) { +#if TX_FRAGPROG == 0 struct tgsi_parse_context pc; tgsi_parse_init(&pc, p->pipe.tokens); @@ -665,6 +546,7 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) p->insns[7] = 0x00020781; } else if (pc.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) { +#endif int i; if (nv50_program_tx(p) == FALSE) @@ -673,6 +555,7 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) for (i = 0; i < p->insns_nr; i++) NOUVEAU_ERR("%d 0x%08x\n", i, p->insns[i]); +#if TX_FRAGPROG == 0 } else { NOUVEAU_ERR("invalid TGSI processor\n"); tgsi_parse_free(&pc); @@ -680,6 +563,7 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) } tgsi_parse_free(&pc); +#endif p->translated = TRUE; } @@ -758,11 +642,8 @@ nv50_fragprog_validate(struct nv50_context *nv50) assert(0); } - if (!p->buffer) - p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4); - map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); - memcpy(map, p->insns, p->insns_nr * 4); - ws->buffer_unmap(ws, p->buffer); + nv50_program_validate_data(nv50, p); + nv50_program_validate_code(nv50, p); so = so_new(3, 2); so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); -- cgit v1.2.3 From 8ec6415e9fcf876c67bc1624f3eb7dd7624b7791 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 3 Jun 2008 12:37:29 +1000 Subject: nv50: start using interpreter for fragprog too, not hardcoded passthrough --- src/gallium/drivers/nv50/nv50_program.c | 96 ++++++++++++++++++++++++++++----- src/gallium/drivers/nv50/nv50_state.c | 2 + src/gallium/drivers/nv50/nv50_state.h | 8 ++- 3 files changed, 92 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index aa848faa49..9d89f6d060 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -11,7 +11,7 @@ #include "nv50_state.h" #define NV50_SU_MAX_TEMP 64 -#define TX_FRAGPROG 0 +#define TX_FRAGPROG 1 struct nv50_reg { enum { @@ -52,15 +52,23 @@ alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) { int i; - if (reg->type != P_TEMP || reg->hw >= 0) + if (reg->type != P_TEMP) return; + if (reg->hw >= 0) { + /*XXX: do this here too to catch FP temp-as-attr usage.. + * not clean, but works */ + if (pc->p->cfg.high_temp < (reg->hw + 1)) + pc->p->cfg.high_temp = reg->hw + 1; + return; + } + for (i = 0; i < NV50_SU_MAX_TEMP; i++) { if (!(pc->r_temp[i])) { pc->r_temp[i] = reg; reg->hw = i; - if (pc->p->cfg.vp.high_temp < (i + 1)) - pc->p->cfg.vp.high_temp = i + 1; + if (pc->p->cfg.high_temp < (i + 1)) + pc->p->cfg.high_temp = i + 1; return; } } @@ -236,6 +244,24 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, unsigned *inst) inst[1] |= (val >> 6) << 2; } +static void +emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective) +{ + unsigned inst[2] = { 0, 0 }; + + inst[0] |= 0x80000000; + set_dst(pc, dst, inst); + alloc_reg(pc, iv); + inst[0] |= (iv->hw << 9); + alloc_reg(pc, src); + inst[0] |= (src->hw << 16); + if (noperspective) + inst[0] |= (1 << 25); + + emit(pc, inst); +} + static void emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { @@ -409,20 +435,57 @@ nv50_program_tx_prep(struct nv50_pc *pc) NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr); if (pc->attr_nr) { + struct nv50_reg *iv = NULL, *tmp = NULL; int aid = 0; pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg)); if (!pc->attr) goto out_err; + if (pc->p->type == NV50_PROG_FRAGMENT) { + iv = alloc_temp(pc, NULL); + aid++; + } + for (i = 0; i < pc->attr_nr; i++) { + struct nv50_reg *a = &pc->attr[i*4]; + for (c = 0; c < 4; c++) { - pc->p->cfg.vp.attr[aid/32] |= (1 << (aid % 32)); - pc->attr[i*4+c].type = P_ATTR; - pc->attr[i*4+c].hw = aid++; - pc->attr[i*4+c].index = i; + if (pc->p->type == NV50_PROG_FRAGMENT) { + struct nv50_reg *at = + alloc_temp(pc, NULL); + pc->attr[i*4+c].type = at->type; + pc->attr[i*4+c].hw = at->hw; + pc->attr[i*4+c].index = at->index; + } else { + pc->p->cfg.vp.attr[aid/32] |= + (1 << (aid % 32)); + pc->attr[i*4+c].type = P_ATTR; + pc->attr[i*4+c].hw = aid++; + pc->attr[i*4+c].index = i; + } } + + if (pc->p->type != NV50_PROG_FRAGMENT) + continue; + + emit_interp(pc, iv, iv, iv, FALSE); + tmp = alloc_temp(pc, NULL); + { + unsigned inst[2] = { 0, 0 }; + inst[0] = 0x90000000; + inst[0] |= (tmp->hw << 2); + emit(pc, inst); + } + emit_interp(pc, &a[0], &a[0], tmp, TRUE); + emit_interp(pc, &a[1], &a[1], tmp, TRUE); + emit_interp(pc, &a[2], &a[2], tmp, TRUE); + emit_interp(pc, &a[3], &a[3], tmp, TRUE); + free_temp(pc, tmp); } + + if (iv) + free_temp(pc, iv); } NOUVEAU_ERR("%d result regs\n", pc->result_nr); @@ -435,7 +498,10 @@ nv50_program_tx_prep(struct nv50_pc *pc) for (i = 0; i < pc->result_nr; i++) { for (c = 0; c < 4; c++) { - pc->result[i*4+c].type = P_RESULT; + if (pc->p->type == NV50_PROG_FRAGMENT) + pc->result[i*4+c].type = P_TEMP; + else + pc->result[i*4+c].type = P_RESULT; pc->result[i*4+c].hw = rid++; pc->result[i*4+c].index = i; } @@ -492,7 +558,7 @@ nv50_program_tx(struct nv50_program *p) if (!pc) return FALSE; pc->p = p; - pc->p->cfg.vp.high_temp = 4; + pc->p->cfg.high_temp = 4; ret = nv50_program_tx_prep(pc); if (ret == FALSE) @@ -551,6 +617,8 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) if (nv50_program_tx(p) == FALSE) assert(0); + /* *not* sufficient, it's fine if last inst is long and + * NOT immd - otherwise it's fucked fucked fucked */ p->insns[p->insns_nr - 1] |= 0x00000001; for (i = 0; i < p->insns_nr; i++) @@ -620,7 +688,7 @@ nv50_vertprog_validate(struct nv50_context *nv50) so_data (so, p->cfg.vp.attr[1]); so_method(so, tesla, 0x16ac, 2); so_data (so, 8); - so_data (so, p->cfg.vp.high_temp); + so_data (so, p->cfg.high_temp); so_method(so, tesla, 0x140c, 1); so_data (so, 0); /* program start offset */ so_emit(nv50->screen->nvws, so); @@ -645,12 +713,16 @@ nv50_fragprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(3, 2); + so = so_new(7, 2); so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_method(so, tesla, 0x198c, 1); + so_data (so, p->cfg.high_temp); + so_method(so, tesla, 0x1414, 1); + so_data (so, 0); /* program start offset */ so_emit(nv50->screen->nvws, so); so_ref(NULL, &so); } diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index ba3d04cede..48d09c296b 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -331,6 +331,7 @@ nv50_vp_state_create(struct pipe_context *pipe, struct nv50_program *p = CALLOC_STRUCT(nv50_program); p->pipe = *cso; + p->type = NV50_PROG_VERTEX; tgsi_scan_shader(p->pipe.tokens, &p->info); return (void *)p; } @@ -360,6 +361,7 @@ nv50_fp_state_create(struct pipe_context *pipe, struct nv50_program *p = CALLOC_STRUCT(nv50_program); p->pipe = *cso; + p->type = NV50_PROG_FRAGMENT; tgsi_scan_shader(p->pipe.tokens, &p->info); return (void *)p; } diff --git a/src/gallium/drivers/nv50/nv50_state.h b/src/gallium/drivers/nv50/nv50_state.h index 9e3876871b..dd352b6585 100644 --- a/src/gallium/drivers/nv50/nv50_state.h +++ b/src/gallium/drivers/nv50/nv50_state.h @@ -15,6 +15,10 @@ struct nv50_program { struct tgsi_shader_info info; boolean translated; + enum { + NV50_PROG_VERTEX, + NV50_PROG_FRAGMENT + } type; unsigned *insns; unsigned insns_nr; @@ -25,9 +29,9 @@ struct nv50_program { struct nouveau_resource *data; - union { + struct { + unsigned high_temp; struct { - unsigned high_temp; unsigned attr[2]; } vp; } cfg; -- cgit v1.2.3 From e55964099b0d47dea80920765daac09b9e2a61a7 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 3 Jun 2008 12:38:12 +1000 Subject: nv50: remove hardcoded fp stuff --- src/gallium/drivers/nv50/nv50_program.c | 45 ++++++--------------------------- 1 file changed, 8 insertions(+), 37 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 9d89f6d060..df29069e62 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -11,7 +11,6 @@ #include "nv50_state.h" #define NV50_SU_MAX_TEMP 64 -#define TX_FRAGPROG 1 struct nv50_reg { enum { @@ -594,44 +593,16 @@ out_cleanup: static void nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) { -#if TX_FRAGPROG == 0 - struct tgsi_parse_context pc; - - tgsi_parse_init(&pc, p->pipe.tokens); - - if (pc.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { - p->insns_nr = 8; - p->insns = malloc(p->insns_nr * sizeof(unsigned)); - p->insns[0] = 0x80000000; - p->insns[1] = 0x9000000c; - p->insns[2] = 0x82010600; - p->insns[3] = 0x82020604; - p->insns[4] = 0x80030609; - p->insns[5] = 0x00020780; - p->insns[6] = 0x8004060d; - p->insns[7] = 0x00020781; - } else - if (pc.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) { -#endif - int i; - - if (nv50_program_tx(p) == FALSE) - assert(0); - /* *not* sufficient, it's fine if last inst is long and - * NOT immd - otherwise it's fucked fucked fucked */ - p->insns[p->insns_nr - 1] |= 0x00000001; + int i; - for (i = 0; i < p->insns_nr; i++) - NOUVEAU_ERR("%d 0x%08x\n", i, p->insns[i]); -#if TX_FRAGPROG == 0 - } else { - NOUVEAU_ERR("invalid TGSI processor\n"); - tgsi_parse_free(&pc); - return; - } + if (nv50_program_tx(p) == FALSE) + assert(0); + /* *not* sufficient, it's fine if last inst is long and + * NOT immd - otherwise it's fucked fucked fucked */ + p->insns[p->insns_nr - 1] |= 0x00000001; - tgsi_parse_free(&pc); -#endif + for (i = 0; i < p->insns_nr; i++) + NOUVEAU_ERR("%d 0x%08x\n", i, p->insns[i]); p->translated = TRUE; } -- cgit v1.2.3 From 2a1fb44d75364f2492a1ae5d232218a92b8ca807 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 4 Jun 2008 21:23:14 +1000 Subject: nv50: checkpoint: shader code now exceeds caps of "old" code --- src/gallium/drivers/nv50/nv50_program.c | 279 +++++++++++++++++++++++++++++++- 1 file changed, 272 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index df29069e62..bc0b7b2827 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -44,6 +44,9 @@ struct nv50_pc { struct nv50_reg *immd; float *immd_buf; int immd_nr; + + struct nv50_reg *temp_temp[4]; + unsigned temp_temp_nr; }; static void @@ -108,6 +111,26 @@ free_temp(struct nv50_pc *pc, struct nv50_reg *r) } } +static struct nv50_reg * +temp_temp(struct nv50_pc *pc) +{ + if (pc->temp_temp_nr >= 4) + assert(0); + + pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); + return pc->temp_temp[pc->temp_temp_nr++]; +} + +static void +kill_temp_temp(struct nv50_pc *pc) +{ + int i; + + for (i = 0; i < pc->temp_temp_nr; i++) + free_temp(pc, pc->temp_temp[i]); + pc->temp_temp_nr = 0; +} + static struct nv50_reg * tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) { @@ -168,6 +191,8 @@ emit(struct nv50_pc *pc, unsigned *inst) p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned)); } + + kill_temp_temp(pc); } static INLINE void set_long(struct nv50_pc *, unsigned *); @@ -261,11 +286,24 @@ emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, emit(pc, inst); } +static void +set_cseg(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) +{ + set_long(pc, inst); + if (src->type == P_IMMD) { + inst[1] |= (NV50_CB_PMISC << 22); + } else { + if (pc->p->type == NV50_PROG_VERTEX) + inst[1] |= (NV50_CB_PVP << 22); + else + inst[1] |= (NV50_CB_PFP << 22); + } +} + static void emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { unsigned inst[2] = { 0, 0 }; - int i; inst[0] |= 0x10000000; @@ -280,10 +318,7 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) } else if (src->type == P_IMMD || src->type == P_CONST) { set_long(pc, inst); - if (src->type == P_IMMD) - inst[1] |= (NV50_CB_PMISC << 22); - else - inst[1] |= (NV50_CB_PVP << 22); + set_cseg(pc, src, inst); inst[0] |= (src->hw << 9); inst[1] |= 0x20000000; /* src0 const? */ } else { @@ -308,11 +343,177 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) emit(pc, inst); } +static boolean +check_swap_src_0_1(struct nv50_pc *pc, + struct nv50_reg **s0, struct nv50_reg **s1) +{ + struct nv50_reg *src0 = *s0, *src1 = *s1; + + if (src0->type == P_CONST) { + if (src1->type != P_CONST) { + *s0 = src1; + *s1 = src0; + return TRUE; + } + } else + if (src1->type == P_ATTR) { + if (src0->type != P_ATTR) { + *s0 = src1; + *s1 = src0; + return TRUE; + } + } + + return FALSE; +} + +static void +set_src_0(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) +{ + if (src->type == P_ATTR) { + set_long(pc, inst); + inst[1] |= 0x00200000; + } else + if (src->type == P_CONST || src->type == P_IMMD) { + struct nv50_reg *temp = temp_temp(pc); + + emit_mov(pc, temp, src); + src = temp; + } + + alloc_reg(pc, src); + inst[0] |= (src->hw << 9); +} + +static void +set_src_1(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) +{ + if (src->type == P_ATTR) { + struct nv50_reg *temp = temp_temp(pc); + + emit_mov(pc, temp, src); + src = temp; + } else + if (src->type == P_CONST || src->type == P_IMMD) { + set_cseg(pc, src, inst); + inst[0] |= 0x00800000; + } + + alloc_reg(pc, src); + inst[0] |= (src->hw << 16); +} + +static void +set_src_2(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) +{ + set_long(pc, inst); + + if (src->type == P_ATTR) { + struct nv50_reg *temp = temp_temp(pc); + + emit_mov(pc, temp, src); + src = temp; + } else + if (src->type == P_CONST || src->type == P_IMMD) { + set_cseg(pc, src, inst); + inst[0] |= 0x01000000; + } + + alloc_reg(pc, src); + inst[1] |= (src->hw << 14); +} + +static void +emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, + struct nv50_reg *src1) +{ + unsigned inst[2] = { 0, 0 }; + + inst[0] |= 0xc0000000; + + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, inst); + set_src_0(pc, src0, inst); + set_src_1(pc, src1, inst); + + emit(pc, inst); +} + +static void +emit_add(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, + struct nv50_reg *src1) +{ + unsigned inst[2] = { 0, 0 }; + + inst[0] |= 0xb0000000; + + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, inst); + set_src_0(pc, src0, inst); + set_src_2(pc, src1, inst); + + emit(pc, inst); +} + +static void +emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, + struct nv50_reg *src1) +{ + unsigned inst[2] = { 0, 0 }; + + inst[0] |= 0xb0000000; + + set_long(pc, inst); + if (check_swap_src_0_1(pc, &src0, &src1)) + inst[1] |= 0x04000000; + else + inst[1] |= 0x08000000; + + set_dst(pc, dst, inst); + set_src_0(pc, src0, inst); + set_src_2(pc, src1, inst); + + emit(pc, inst); +} + +static void +emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, + struct nv50_reg *src1, struct nv50_reg *src2) +{ + unsigned inst[2] = { 0, 0 }; + + inst[0] |= 0xe0000000; + + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, inst); + set_src_0(pc, src0, inst); + set_src_1(pc, src1, inst); + set_src_2(pc, src2, inst); + + emit(pc, inst); +} + +static void +emit_flop(struct nv50_pc *pc, unsigned sub, + struct nv50_reg *dst, struct nv50_reg *src) +{ + unsigned inst[2] = { 0, 0 }; + + set_long(pc, inst); + inst[0] |= 0x90000000; + inst[1] |= (sub << 29); + + set_dst(pc, dst, inst); + set_src_0(pc, src, inst); + + emit(pc, inst); +} + static boolean nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) { const struct tgsi_full_instruction *inst = &tok->FullInstruction; - struct nv50_reg *dst[4], *src[3][4]; + struct nv50_reg *dst[4], *src[3][4], *temp; unsigned mask; int i, c; @@ -333,10 +534,69 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ADD: + for (c = 0; c < 4; c++) + emit_add(pc, dst[c], src[0][c], src[1][c]); + break; + case TGSI_OPCODE_COS: + for (c = 0; c < 4; c++) + emit_flop(pc, 5, dst[c], src[0][c]); + break; + case TGSI_OPCODE_DP3: + temp = alloc_temp(pc, NULL); + emit_mul(pc, temp, src[0][0], src[1][0]); + emit_mad(pc, temp, src[0][1], src[1][1], temp); + emit_mad(pc, temp, src[0][2], src[1][2], temp); + for (c = 0; c < 4; c++) + emit_mov(pc, dst[c], temp); + free_temp(pc, temp); + break; + case TGSI_OPCODE_DP4: + temp = alloc_temp(pc, NULL); + emit_mul(pc, temp, src[0][0], src[1][0]); + emit_mad(pc, temp, src[0][1], src[1][1], temp); + emit_mad(pc, temp, src[0][2], src[1][2], temp); + emit_mad(pc, temp, src[0][3], src[1][3], temp); + for (c = 0; c < 4; c++) + emit_mov(pc, dst[c], temp); + free_temp(pc, temp); + break; + case TGSI_OPCODE_EX2: + for (c = 0; c < 4; c++) + emit_flop(pc, 6, dst[c], src[0][c]); + break; + case TGSI_OPCODE_LG2: + for (c = 0; c < 4; c++) + emit_flop(pc, 3, dst[c], src[0][c]); + break; + case TGSI_OPCODE_MAD: + for (c = 0; c < 4; c++) + emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]); + break; case TGSI_OPCODE_MOV: for (c = 0; c < 4; c++) emit_mov(pc, dst[c], src[0][c]); break; + case TGSI_OPCODE_MUL: + for (c = 0; c < 4; c++) + emit_mul(pc, dst[c], src[0][c], src[1][c]); + break; + case TGSI_OPCODE_RCP: + for (c = 0; c < 4; c++) + emit_flop(pc, 0, dst[c], src[0][c]); + break; + case TGSI_OPCODE_RSQ: + for (c = 0; c < 4; c++) + emit_flop(pc, 2, dst[c], src[0][c]); + break; + case TGSI_OPCODE_SIN: + for (c = 0; c < 4; c++) + emit_flop(pc, 4, dst[c], src[0][c]); + break; + case TGSI_OPCODE_SUB: + for (c = 0; c < 4; c++) + emit_sub(pc, dst[c], src[0][c], src[1][c]); + break; case TGSI_OPCODE_END: break; default: @@ -601,8 +861,13 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) * NOT immd - otherwise it's fucked fucked fucked */ p->insns[p->insns_nr - 1] |= 0x00000001; + if (p->type == NV50_PROG_VERTEX) { + for (i = 0; i < p->insns_nr; i++) + NOUVEAU_ERR("VP0x%08x\n", p->insns[i]); + } else { for (i = 0; i < p->insns_nr; i++) - NOUVEAU_ERR("%d 0x%08x\n", i, p->insns[i]); + NOUVEAU_ERR("FP0x%08x\n", p->insns[i]); + } p->translated = TRUE; } -- cgit v1.2.3 From 52a69196c1680ff16d1ad1fc88e5869bc6055d00 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 4 Jun 2008 21:45:32 +1000 Subject: nv50: some fixes + MIN/MAX --- src/gallium/drivers/nv50/nv50_program.c | 48 ++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index bc0b7b2827..afc911cd26 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -45,7 +45,7 @@ struct nv50_pc { float *immd_buf; int immd_nr; - struct nv50_reg *temp_temp[4]; + struct nv50_reg *temp_temp[8]; unsigned temp_temp_nr; }; @@ -114,7 +114,7 @@ free_temp(struct nv50_pc *pc, struct nv50_reg *r) static struct nv50_reg * temp_temp(struct nv50_pc *pc) { - if (pc->temp_temp_nr >= 4) + if (pc->temp_temp_nr >= 8) assert(0); pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); @@ -191,8 +191,6 @@ emit(struct nv50_pc *pc, unsigned *inst) p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned)); } - - kill_temp_temp(pc); } static INLINE void set_long(struct nv50_pc *, unsigned *); @@ -440,8 +438,8 @@ emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, } static void -emit_add(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, - struct nv50_reg *src1) +emit_add(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, struct nv50_reg *src1) { unsigned inst[2] = { 0, 0 }; @@ -450,7 +448,28 @@ emit_add(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, check_swap_src_0_1(pc, &src0, &src1); set_dst(pc, dst, inst); set_src_0(pc, src0, inst); - set_src_2(pc, src1, inst); + if (is_long(inst)) + set_src_2(pc, src1, inst); + else + set_src_1(pc, src1, inst); + + emit(pc, inst); +} + +static void +emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, + struct nv50_reg *src0, struct nv50_reg *src1) +{ + unsigned inst[2] = { 0, 0 }; + + set_long(pc, inst); + inst[0] |= 0xb0000000; + inst[1] |= (sub << 29); + + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, inst); + set_src_0(pc, src0, inst); + set_src_1(pc, src1, inst); emit(pc, inst); } @@ -499,9 +518,11 @@ emit_flop(struct nv50_pc *pc, unsigned sub, { unsigned inst[2] = { 0, 0 }; - set_long(pc, inst); inst[0] |= 0x90000000; - inst[1] |= (sub << 29); + if (sub) { + set_long(pc, inst); + inst[1] |= (sub << 29); + } set_dst(pc, dst, inst); set_src_0(pc, src, inst); @@ -573,6 +594,14 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) for (c = 0; c < 4; c++) emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]); break; + case TGSI_OPCODE_MAX: + for (c = 0; c < 4; c++) + emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]); + break; + case TGSI_OPCODE_MIN: + for (c = 0; c < 4; c++) + emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]); + break; case TGSI_OPCODE_MOV: for (c = 0; c < 4; c++) emit_mov(pc, dst[c], src[0][c]); @@ -604,6 +633,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) return FALSE; } + kill_temp_temp(pc); return TRUE; } -- cgit v1.2.3 From 7df7f7bb99441ed8e2fba2840e0459e72691f272 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 5 Jun 2008 18:39:00 +1000 Subject: nv50: big fuckup, fix it --- src/gallium/drivers/nv50/nv50_program.c | 75 ++++++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 15 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index afc911cd26..a293db6f72 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -556,20 +556,29 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ADD: - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_add(pc, dst[c], src[0][c], src[1][c]); + } break; case TGSI_OPCODE_COS: - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_flop(pc, 5, dst[c], src[0][c]); + } break; case TGSI_OPCODE_DP3: temp = alloc_temp(pc, NULL); emit_mul(pc, temp, src[0][0], src[1][0]); emit_mad(pc, temp, src[0][1], src[1][1], temp); emit_mad(pc, temp, src[0][2], src[1][2], temp); - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_mov(pc, dst[c], temp); + } free_temp(pc, temp); break; case TGSI_OPCODE_DP4: @@ -578,53 +587,89 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_mad(pc, temp, src[0][1], src[1][1], temp); emit_mad(pc, temp, src[0][2], src[1][2], temp); emit_mad(pc, temp, src[0][3], src[1][3], temp); - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_mov(pc, dst[c], temp); + } free_temp(pc, temp); break; case TGSI_OPCODE_EX2: - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_flop(pc, 6, dst[c], src[0][c]); + } break; case TGSI_OPCODE_LG2: - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_flop(pc, 3, dst[c], src[0][c]); + } break; case TGSI_OPCODE_MAD: - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]); + } break; case TGSI_OPCODE_MAX: - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]); + } break; case TGSI_OPCODE_MIN: - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]); + } break; case TGSI_OPCODE_MOV: - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_mov(pc, dst[c], src[0][c]); + } break; case TGSI_OPCODE_MUL: - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_mul(pc, dst[c], src[0][c], src[1][c]); + } break; case TGSI_OPCODE_RCP: - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_flop(pc, 0, dst[c], src[0][c]); + } break; case TGSI_OPCODE_RSQ: - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_flop(pc, 2, dst[c], src[0][c]); + } break; case TGSI_OPCODE_SIN: - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_flop(pc, 4, dst[c], src[0][c]); + } break; case TGSI_OPCODE_SUB: - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_sub(pc, dst[c], src[0][c], src[1][c]); + } break; case TGSI_OPCODE_END: break; -- cgit v1.2.3 From fbf4027dd9b279ec159906dcad134f71e34aaec8 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 5 Jun 2008 18:52:16 +1000 Subject: nv50: fix EX2.. somehow --- src/gallium/drivers/nv50/nv50_program.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index a293db6f72..6d3d8ff302 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -595,11 +595,22 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) free_temp(pc, temp); break; case TGSI_OPCODE_EX2: + temp = alloc_temp(pc, NULL); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_flop(pc, 6, dst[c], src[0][c]); + { + unsigned inst[2] = { 0, 0 }; + inst[0] |= 0xb0000000; + set_dst(pc, temp, inst); + set_src_0(pc, src[0][c], inst); + set_long(pc, inst); + inst[1] |= (6 << 29) | 0x00004000; + emit(pc, inst); + } + emit_flop(pc, 6, dst[c], temp); } + free_temp(pc, temp); break; case TGSI_OPCODE_LG2: for (c = 0; c < 4; c++) { -- cgit v1.2.3 From 21e18e2b74d71c93af06ef4c603ca371c4614237 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 5 Jun 2008 23:48:23 +1000 Subject: nv50: DPH, XPD, some TODOs --- src/gallium/drivers/nv50/nv50_program.c | 61 +++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 6d3d8ff302..91f24369f1 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -12,6 +12,25 @@ #define NV50_SU_MAX_TEMP 64 +/* ABS + * ARL + * DST - const(1.0) + * FLR + * FRC + * LIT + * POW + * SGE - cvt + * SLT - cvt + * SWZ + * + * MSB - Like MAD, but MUL+SUB + * - Fuck it off, introduce a way to negate args for ops that + * support it. + * + * Need ability to specifiy driver IMMD values, like nv40 constant() + * + * Look into inlining IMMD for ops other than MOV + */ struct nv50_reg { enum { P_TEMP, @@ -512,6 +531,25 @@ emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, emit(pc, inst); } +static void +emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, + struct nv50_reg *src1, struct nv50_reg *src2) +{ + unsigned inst[2] = { 0, 0 }; + + inst[0] |= 0xe0000000; + set_long(pc, inst); + inst[1] |= 0x08000000; /* src0 * src1 - src2 */ + + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, inst); + set_src_0(pc, src0, inst); + set_src_1(pc, src1, inst); + set_src_2(pc, src2, inst); + + emit(pc, inst); +} + static void emit_flop(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, struct nv50_reg *src) @@ -594,6 +632,19 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } free_temp(pc, temp); break; + case TGSI_OPCODE_DPH: + temp = alloc_temp(pc, NULL); + emit_mul(pc, temp, src[0][0], src[1][0]); + emit_mad(pc, temp, src[0][1], src[1][1], temp); + emit_mad(pc, temp, src[0][2], src[1][2], temp); + emit_add(pc, temp, src[1][3], temp); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], temp); + } + free_temp(pc, temp); + break; case TGSI_OPCODE_EX2: temp = alloc_temp(pc, NULL); for (c = 0; c < 4; c++) { @@ -682,6 +733,16 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_sub(pc, dst[c], src[0][c], src[1][c]); } break; + case TGSI_OPCODE_XPD: + temp = alloc_temp(pc, NULL); + emit_mul(pc, temp, src[0][2], src[1][1]); + emit_msb(pc, dst[0], src[0][1], src[1][2], temp); + emit_mul(pc, temp, src[0][0], src[1][2]); + emit_msb(pc, dst[1], src[0][2], src[1][0], temp); + emit_mul(pc, temp, src[0][1], src[1][0]); + emit_msb(pc, dst[2], src[0][0], src[1][1], temp); + free_temp(pc, temp); + break; case TGSI_OPCODE_END: break; default: -- cgit v1.2.3 From d69f33423087fc054181c60724f4bcbe29195e08 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 6 Jun 2008 13:57:59 +1000 Subject: nv50: small cleanup --- src/gallium/drivers/nv50/nv50_program.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 91f24369f1..7295c2314c 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -568,6 +568,21 @@ emit_flop(struct nv50_pc *pc, unsigned sub, emit(pc, inst); } +static void +emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + unsigned inst[2] = { 0, 0 }; + + inst[0] |= 0xb0000000; + + set_dst(pc, dst, inst); + set_src_0(pc, src, inst); + set_long(pc, inst); + inst[1] |= (6 << 29) | 0x00004000; + + emit(pc, inst); +} + static boolean nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) { @@ -650,15 +665,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - { - unsigned inst[2] = { 0, 0 }; - inst[0] |= 0xb0000000; - set_dst(pc, temp, inst); - set_src_0(pc, src[0][c], inst); - set_long(pc, inst); - inst[1] |= (6 << 29) | 0x00004000; - emit(pc, inst); - } + emit_preex2(pc, temp, src[0][c]); emit_flop(pc, 6, dst[c], temp); } free_temp(pc, temp); @@ -1081,11 +1088,9 @@ nv50_vertprog_validate(struct nv50_context *nv50) void nv50_fragprog_validate(struct nv50_context *nv50) { - struct pipe_winsys *ws = nv50->pipe.winsys; struct nouveau_grobj *tesla = nv50->screen->tesla; struct nv50_program *p = nv50->fragprog; struct nouveau_stateobj *so; - void *map; if (!p->translated) { nv50_program_validate(nv50, p); -- cgit v1.2.3 From bdd31c20abb27665ca701a5a46e29d4cfa71f679 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 7 Jun 2008 11:41:47 +1000 Subject: nv50: SGE/SLT --- src/gallium/drivers/nv50/nv50_program.c | 82 ++++++++++++++++++++++++++++----- 1 file changed, 71 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 7295c2314c..fa16c7522e 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -19,8 +19,6 @@ * FRC * LIT * POW - * SGE - cvt - * SLT - cvt * SWZ * * MSB - Like MAD, but MUL+SUB @@ -201,15 +199,15 @@ emit(struct nv50_pc *pc, unsigned *inst) { struct nv50_program *p = pc->p; - if (inst[0] & 1) { - p->insns_nr += 2; - p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); - memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2); - } else { - p->insns_nr += 1; - p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); - memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned)); - } + if (inst[0] & 1) { + p->insns_nr += 2; + p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); + memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2); + } else { + p->insns_nr += 1; + p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); + memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned)); + } } static INLINE void set_long(struct nv50_pc *, unsigned *); @@ -582,6 +580,54 @@ emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) emit(pc, inst); } +/*XXX: inaccurate results.. why? */ +#define ALLOW_SET_SWAP 0 + +static void +emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, + struct nv50_reg *src0, struct nv50_reg *src1) +{ + unsigned inst[2] = { 0, 0 }; +#if ALLOW_SET_SWAP + unsigned inv_cop[8] = { 0, 6, 2, 4, 3, 5, 1, 7 }; +#endif + struct nv50_reg *rdst; + +#if ALLOW_SET_SWAP + assert(c_op <= 7); + if (check_swap_src_0_1(pc, &src0, &src1)) + c_op = inv_cop[c_op]; +#endif + + rdst = dst; + if (dst->type != P_TEMP) + dst = alloc_temp(pc, NULL); + + /* set.u32 */ + set_long(pc, inst); + inst[0] |= 0xb0000000; + inst[1] |= (3 << 29); + inst[1] |= (c_op << 14); + /*XXX: breaks things, .u32 by default? + * decuda will disasm as .u16 and use .lo/.hi regs, but this + * doesn't seem to match what the hw actually does. + inst[1] |= 0x04000000; << breaks things.. .u32 by default? + */ + set_dst(pc, dst, inst); + set_src_0(pc, src0, inst); + set_src_1(pc, src1, inst); + emit(pc, inst); + + /* cvt.f32.u32 */ + inst[0] = 0xa0000001; + inst[1] = 0x64014780; + set_dst(pc, rdst, inst); + set_src_0(pc, dst, inst); + emit(pc, inst); + + if (dst != rdst) + free_temp(pc, dst); +} static boolean nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) @@ -726,6 +772,13 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_flop(pc, 2, dst[c], src[0][c]); } break; + case TGSI_OPCODE_SGE: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_set(pc, 6, dst[c], src[0][c], src[1][c]); + } + break; case TGSI_OPCODE_SIN: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) @@ -733,6 +786,13 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_flop(pc, 4, dst[c], src[0][c]); } break; + case TGSI_OPCODE_SLT: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_set(pc, 1, dst[c], src[0][c], src[1][c]); + } + break; case TGSI_OPCODE_SUB: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) -- cgit v1.2.3 From b4f7463585071236d633e4c857dbbdf67b03dc94 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 7 Jun 2008 12:55:06 +1000 Subject: nv50: FLR/FRC --- src/gallium/drivers/nv50/nv50_program.c | 37 +++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index fa16c7522e..448062e767 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -15,8 +15,6 @@ /* ABS * ARL * DST - const(1.0) - * FLR - * FRC * LIT * POW * SWZ @@ -629,6 +627,24 @@ emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, free_temp(pc, dst); } +static void +emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + unsigned inst[2] = { 0, 0 }; + + set_long(pc, inst); + inst[0] = 0xa0000000; /* cvt */ + inst[1] |= (6 << 29); /* cvt */ + inst[1] |= 0x08000000; /* integer mode */ + inst[1] |= 0x04000000; /* 32 bit */ + inst[1] |= ((0x1 << 3)) << 14; /* .rn */ + inst[1] |= (1 << 14); /* src .f32 */ + set_dst(pc, dst, inst); + set_src_0(pc, src, inst); + + emit(pc, inst); +} + static boolean nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) { @@ -716,6 +732,23 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } free_temp(pc, temp); break; + case TGSI_OPCODE_FLR: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_flr(pc, dst[c], src[0][c]); + } + break; + case TGSI_OPCODE_FRC: + temp = alloc_temp(pc, NULL); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_flr(pc, temp, src[0][c]); + emit_sub(pc, dst[c], src[0][c], temp); + } + free_temp(pc, temp); + break; case TGSI_OPCODE_LG2: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) -- cgit v1.2.3 From 68091b0c89310c309b668c9d6d80640dc6040ab7 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 7 Jun 2008 13:01:58 +1000 Subject: nv50: ABS --- src/gallium/drivers/nv50/nv50_program.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 448062e767..7c2177d42d 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -12,8 +12,7 @@ #define NV50_SU_MAX_TEMP 64 -/* ABS - * ARL +/* ARL * DST - const(1.0) * LIT * POW @@ -670,6 +669,21 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + for (c = 0; c < 4; c++) { + unsigned inst[2] = { 0, 0 }; + + set_long(pc, inst); + inst[0] = 0xa0000000; /* cvt */ + inst[1] |= (6 << 29); /* cvt */ + inst[1] |= 0x04000000; /* 32 bit */ + inst[1] |= (1 << 14); /* src .f32 */ + inst[1] |= ((1 << 6) << 14); /* .abs */ + set_dst(pc, dst[c], inst); + set_src_0(pc, src[0][c], inst); + emit(pc, inst); + } + break; case TGSI_OPCODE_ADD: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) -- cgit v1.2.3 From 33e4d30d50344be26398a51365bea1be37487403 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 7 Jun 2008 14:10:48 +1000 Subject: nv50: DST --- src/gallium/drivers/nv50/nv50_program.c | 61 ++++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 16 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 7c2177d42d..49a731842f 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -13,18 +13,18 @@ #define NV50_SU_MAX_TEMP 64 /* ARL - * DST - const(1.0) - * LIT + * LIT - other buggery * POW - * SWZ + * SWZ - negation ARGH + * SAT * * MSB - Like MAD, but MUL+SUB * - Fuck it off, introduce a way to negate args for ops that * support it. * - * Need ability to specifiy driver IMMD values, like nv40 constant() - * * Look into inlining IMMD for ops other than MOV + * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD, + * but can emit to P_TEMP first - then MOV later. NVIDIA does this */ struct nv50_reg { enum { @@ -145,6 +145,32 @@ kill_temp_temp(struct nv50_pc *pc) pc->temp_temp_nr = 0; } +static int +ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w) +{ + pc->immd_buf = realloc(pc->immd_buf, (pc->immd_nr + 1) * 4 * + sizeof(float)); + pc->immd_buf[(pc->immd_nr * 4) + 0] = x; + pc->immd_buf[(pc->immd_nr * 4) + 1] = x; + pc->immd_buf[(pc->immd_nr * 4) + 2] = x; + pc->immd_buf[(pc->immd_nr * 4) + 3] = x; + + return pc->immd_nr++; +} + +static struct nv50_reg * +alloc_immd(struct nv50_pc *pc, float f) +{ + struct nv50_reg *r = CALLOC_STRUCT(nv50_reg); + unsigned hw; + + hw = ctor_immd(pc, f, 0, 0, 0); + r->type = P_IMMD; + r->hw = hw; + r->index = -1; + return r; +} + static struct nv50_reg * tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) { @@ -736,6 +762,16 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } free_temp(pc, temp); break; + case TGSI_OPCODE_DST: + { + struct nv50_reg *one = alloc_immd(pc, 1.0); + emit_mov(pc, dst[0], one); + emit_mul(pc, dst[1], src[0][1], src[1][1]); + emit_mov(pc, dst[2], src[0][2]); + emit_mov(pc, dst[3], src[1][3]); + FREE(one); + } + break; case TGSI_OPCODE_EX2: temp = alloc_temp(pc, NULL); for (c = 0; c < 4; c++) { @@ -886,17 +922,10 @@ nv50_program_tx_prep(struct nv50_pc *pc) const struct tgsi_full_immediate *imm = &p.FullToken.FullImmediate; - pc->immd_nr++; - pc->immd_buf = realloc(pc->immd_buf, 4 * pc->immd_nr * - sizeof(float)); - pc->immd_buf[4 * (pc->immd_nr - 1) + 0] = - imm->u.ImmediateFloat32[0].Float; - pc->immd_buf[4 * (pc->immd_nr - 1) + 1] = - imm->u.ImmediateFloat32[1].Float; - pc->immd_buf[4 * (pc->immd_nr - 1) + 2] = - imm->u.ImmediateFloat32[2].Float; - pc->immd_buf[4 * (pc->immd_nr - 1) + 3] = - imm->u.ImmediateFloat32[3].Float; + ctor_immd(pc, imm->u.ImmediateFloat32[0].Float, + imm->u.ImmediateFloat32[1].Float, + imm->u.ImmediateFloat32[2].Float, + imm->u.ImmediateFloat32[3].Float); } break; case TGSI_TOKEN_TYPE_DECLARATION: -- cgit v1.2.3 From 34abb858e2aaef2c1a066a7cdb3e0376d6c9f6bd Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 7 Jun 2008 15:21:05 +1000 Subject: nv50: handle 0/1 SWZ --- src/gallium/drivers/nv50/nv50_program.c | 56 +++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 17 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 49a731842f..cad1d9d679 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -15,7 +15,6 @@ /* ARL * LIT - other buggery * POW - * SWZ - negation ARGH * SAT * * MSB - Like MAD, but MUL+SUB @@ -189,32 +188,55 @@ tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) } static struct nv50_reg * -tgsi_src(struct nv50_pc *pc, int c, const struct tgsi_full_src_register *src) +tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src) { - /* Handle swizzling */ + struct nv50_reg *r = NULL; + unsigned c; + + c = tgsi_util_get_full_src_register_extswizzle(src, chan); switch (c) { - case 0: c = src->SrcRegister.SwizzleX; break; - case 1: c = src->SrcRegister.SwizzleY; break; - case 2: c = src->SrcRegister.SwizzleZ; break; - case 3: c = src->SrcRegister.SwizzleW; break; + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch (src->SrcRegister.File) { + case TGSI_FILE_INPUT: + r = &pc->attr[src->SrcRegister.Index * 4 + c]; + break; + case TGSI_FILE_TEMPORARY: + r = &pc->temp[src->SrcRegister.Index * 4 + c]; + break; + case TGSI_FILE_CONSTANT: + r = &pc->param[src->SrcRegister.Index * 4 + c]; + break; + case TGSI_FILE_IMMEDIATE: + r = &pc->immd[src->SrcRegister.Index * 4 + c]; + break; + default: + assert(0); + break; + } + break; + case TGSI_EXTSWIZZLE_ZERO: + r = alloc_immd(pc, 0.0); + break; + case TGSI_EXTSWIZZLE_ONE: + r = alloc_immd(pc, 1.0); + break; default: assert(0); + break; } - switch (src->SrcRegister.File) { - case TGSI_FILE_INPUT: - return &pc->attr[src->SrcRegister.Index * 4 + c]; - case TGSI_FILE_TEMPORARY: - return &pc->temp[src->SrcRegister.Index * 4 + c]; - case TGSI_FILE_CONSTANT: - return &pc->param[src->SrcRegister.Index * 4 + c]; - case TGSI_FILE_IMMEDIATE: - return &pc->immd[src->SrcRegister.Index * 4 + c]; + switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) { + case TGSI_UTIL_SIGN_KEEP: + break; default: + assert(0); break; } - return NULL; + return r; } static void -- cgit v1.2.3 From 688064236ba8b5997014493eb6c6e3fe0739813e Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 7 Jun 2008 16:01:06 +1000 Subject: nv50: fixes + untested _SAT modifier --- src/gallium/drivers/nv50/nv50_program.c | 36 ++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index cad1d9d679..9fcd7c36c6 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -15,7 +15,6 @@ /* ARL * LIT - other buggery * POW - * SAT * * MSB - Like MAD, but MUL+SUB * - Fuck it off, introduce a way to negate args for ops that @@ -679,8 +678,8 @@ emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { unsigned inst[2] = { 0, 0 }; - set_long(pc, inst); inst[0] = 0xa0000000; /* cvt */ + set_long(pc, inst); inst[1] |= (6 << 29); /* cvt */ inst[1] |= 0x08000000; /* integer mode */ inst[1] |= 0x04000000; /* 32 bit */ @@ -696,13 +695,14 @@ static boolean nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) { const struct tgsi_full_instruction *inst = &tok->FullInstruction; - struct nv50_reg *dst[4], *src[3][4], *temp; - unsigned mask; + struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp; + unsigned mask, sat; int i, c; NOUVEAU_ERR("insn %p\n", tok); mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE; for (c = 0; c < 4; c++) { if (mask & (1 << c)) @@ -716,13 +716,20 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]); } + if (sat) { + for (c = 0; c < 4; c++) { + rdst[c] = dst[c]; + dst[c] = temp_temp(pc); + } + } + switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ABS: for (c = 0; c < 4; c++) { unsigned inst[2] = { 0, 0 }; - set_long(pc, inst); inst[0] = 0xa0000000; /* cvt */ + set_long(pc, inst); inst[1] |= (6 << 29); /* cvt */ inst[1] |= 0x04000000; /* 32 bit */ inst[1] |= (1 << 14); /* src .f32 */ @@ -922,6 +929,25 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) return FALSE; } + if (sat) { + for (c = 0; c < 4; c++) { + unsigned inst[2] = { 0, 0 }; + + if (!(mask & (1 << c))) + continue; + + inst[0] = 0xa0000000; /* cvt */ + set_long(pc, inst); + inst[1] |= (6 << 29); /* cvt */ + inst[1] |= 0x04000000; /* 32 bit */ + inst[1] |= (1 << 14); /* src .f32 */ + inst[1] |= ((1 << 5) << 14); /* .sat */ + set_dst(pc, rdst[c], inst); + set_src_0(pc, dst[c], inst); + emit(pc, inst); + } + } + kill_temp_temp(pc); return TRUE; } -- cgit v1.2.3 From ea4b09cbcbd9db82648ab30f18c0f46a66ab9f69 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 7 Jun 2008 16:08:36 +1000 Subject: nv50: POW! --- src/gallium/drivers/nv50/nv50_program.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 9fcd7c36c6..79893a9ad4 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -14,15 +14,17 @@ /* ARL * LIT - other buggery - * POW * * MSB - Like MAD, but MUL+SUB * - Fuck it off, introduce a way to negate args for ops that * support it. * - * Look into inlining IMMD for ops other than MOV + * Look into inlining IMMD for ops other than MOV (make it general?) * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD, * but can emit to P_TEMP first - then MOV later. NVIDIA does this + * + * Verify half-insns work where expected - and force disable them where they + * don't work - MUL has it forcibly disabled atm as it fixes POW.. */ struct nv50_reg { enum { @@ -489,6 +491,7 @@ emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, unsigned inst[2] = { 0, 0 }; inst[0] |= 0xc0000000; + set_long(pc, inst); check_swap_src_0_1(pc, &src0, &src1); set_dst(pc, dst, inst); @@ -870,6 +873,19 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_mul(pc, dst[c], src[0][c], src[1][c]); } break; + case TGSI_OPCODE_POW: + temp = alloc_temp(pc, NULL); + emit_flop(pc, 3, temp, src[0][0]); + emit_mul(pc, temp, temp, src[1][0]); + emit_preex2(pc, temp, temp); + emit_flop(pc, 6, temp, temp); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], temp); + } + free_temp(pc, temp); + break; case TGSI_OPCODE_RCP: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) -- cgit v1.2.3 From 686bc00c05094e8678747c111a6a70ad4b7063e3 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 7 Jun 2008 16:51:07 +1000 Subject: nv50: oops, copy+pasto --- src/gallium/drivers/nv50/nv50_program.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 79893a9ad4..b013435f99 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -151,9 +151,9 @@ ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w) pc->immd_buf = realloc(pc->immd_buf, (pc->immd_nr + 1) * 4 * sizeof(float)); pc->immd_buf[(pc->immd_nr * 4) + 0] = x; - pc->immd_buf[(pc->immd_nr * 4) + 1] = x; - pc->immd_buf[(pc->immd_nr * 4) + 2] = x; - pc->immd_buf[(pc->immd_nr * 4) + 3] = x; + pc->immd_buf[(pc->immd_nr * 4) + 1] = y; + pc->immd_buf[(pc->immd_nr * 4) + 2] = z; + pc->immd_buf[(pc->immd_nr * 4) + 3] = w; return pc->immd_nr++; } -- cgit v1.2.3 From faa1c02546db00f69c66db18076b5b0ac86d7138 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 7 Jun 2008 17:36:10 +1000 Subject: nv50: create emit_pow() - emit_lit() will need to use it --- src/gallium/drivers/nv50/nv50_program.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index b013435f99..bc0a834aee 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -627,6 +627,7 @@ emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) emit(pc, inst); } + /*XXX: inaccurate results.. why? */ #define ALLOW_SET_SWAP 0 @@ -694,6 +695,20 @@ emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) emit(pc, inst); } +static void +emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *v, struct nv50_reg *e) +{ + struct nv50_reg *temp = alloc_temp(pc, NULL); + + emit_flop(pc, 3, temp, v); + emit_mul(pc, temp, temp, e); + emit_preex2(pc, temp, temp); + emit_flop(pc, 6, dst, temp); + + free_temp(pc, temp); +} + static boolean nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) { @@ -875,10 +890,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) break; case TGSI_OPCODE_POW: temp = alloc_temp(pc, NULL); - emit_flop(pc, 3, temp, src[0][0]); - emit_mul(pc, temp, temp, src[1][0]); - emit_preex2(pc, temp, temp); - emit_flop(pc, 6, temp, temp); + emit_pow(pc, temp, src[0][0], src[1][0]); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; -- cgit v1.2.3 From fe90cc509f75772ce202930c934bade1d4b116c8 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 7 Jun 2008 17:42:00 +1000 Subject: nv50: obey per-source abs (TGSI_UTIL_SIGN_CLEAR) --- src/gallium/drivers/nv50/nv50_program.c | 175 +++++++++++++++++--------------- 1 file changed, 95 insertions(+), 80 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index bc0a834aee..fabd016491 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -171,75 +171,6 @@ alloc_immd(struct nv50_pc *pc, float f) return r; } -static struct nv50_reg * -tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) -{ - switch (dst->DstRegister.File) { - case TGSI_FILE_TEMPORARY: - return &pc->temp[dst->DstRegister.Index * 4 + c]; - case TGSI_FILE_OUTPUT: - return &pc->result[dst->DstRegister.Index * 4 + c]; - case TGSI_FILE_NULL: - return NULL; - default: - break; - } - - return NULL; -} - -static struct nv50_reg * -tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src) -{ - struct nv50_reg *r = NULL; - unsigned c; - - c = tgsi_util_get_full_src_register_extswizzle(src, chan); - switch (c) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: - switch (src->SrcRegister.File) { - case TGSI_FILE_INPUT: - r = &pc->attr[src->SrcRegister.Index * 4 + c]; - break; - case TGSI_FILE_TEMPORARY: - r = &pc->temp[src->SrcRegister.Index * 4 + c]; - break; - case TGSI_FILE_CONSTANT: - r = &pc->param[src->SrcRegister.Index * 4 + c]; - break; - case TGSI_FILE_IMMEDIATE: - r = &pc->immd[src->SrcRegister.Index * 4 + c]; - break; - default: - assert(0); - break; - } - break; - case TGSI_EXTSWIZZLE_ZERO: - r = alloc_immd(pc, 0.0); - break; - case TGSI_EXTSWIZZLE_ONE: - r = alloc_immd(pc, 1.0); - break; - default: - assert(0); - break; - } - - switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) { - case TGSI_UTIL_SIGN_KEEP: - break; - default: - assert(0); - break; - } - - return r; -} - static void emit(struct nv50_pc *pc, unsigned *inst) { @@ -709,6 +640,98 @@ emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, free_temp(pc, temp); } +static void +emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + unsigned inst[2] = { 0, 0 }; + + inst[0] = 0xa0000000; /* cvt */ + set_long(pc, inst); + inst[1] |= (6 << 29); /* cvt */ + inst[1] |= 0x04000000; /* 32 bit */ + inst[1] |= (1 << 14); /* src .f32 */ + inst[1] |= ((1 << 6) << 14); /* .abs */ + set_dst(pc, dst, inst); + set_src_0(pc, src, inst); + + emit(pc, inst); +} + +static struct nv50_reg * +tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) +{ + switch (dst->DstRegister.File) { + case TGSI_FILE_TEMPORARY: + return &pc->temp[dst->DstRegister.Index * 4 + c]; + case TGSI_FILE_OUTPUT: + return &pc->result[dst->DstRegister.Index * 4 + c]; + case TGSI_FILE_NULL: + return NULL; + default: + break; + } + + return NULL; +} + +static struct nv50_reg * +tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src) +{ + struct nv50_reg *r = NULL; + struct nv50_reg *temp; + unsigned c; + + c = tgsi_util_get_full_src_register_extswizzle(src, chan); + switch (c) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch (src->SrcRegister.File) { + case TGSI_FILE_INPUT: + r = &pc->attr[src->SrcRegister.Index * 4 + c]; + break; + case TGSI_FILE_TEMPORARY: + r = &pc->temp[src->SrcRegister.Index * 4 + c]; + break; + case TGSI_FILE_CONSTANT: + r = &pc->param[src->SrcRegister.Index * 4 + c]; + break; + case TGSI_FILE_IMMEDIATE: + r = &pc->immd[src->SrcRegister.Index * 4 + c]; + break; + default: + assert(0); + break; + } + break; + case TGSI_EXTSWIZZLE_ZERO: + r = alloc_immd(pc, 0.0); + break; + case TGSI_EXTSWIZZLE_ONE: + r = alloc_immd(pc, 1.0); + break; + default: + assert(0); + break; + } + + switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) { + case TGSI_UTIL_SIGN_KEEP: + break; + case TGSI_UTIL_SIGN_CLEAR: + temp = temp_temp(pc); + emit_abs(pc, temp, r); + r = temp; + break; + default: + assert(0); + break; + } + + return r; +} + static boolean nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) { @@ -744,17 +767,9 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ABS: for (c = 0; c < 4; c++) { - unsigned inst[2] = { 0, 0 }; - - inst[0] = 0xa0000000; /* cvt */ - set_long(pc, inst); - inst[1] |= (6 << 29); /* cvt */ - inst[1] |= 0x04000000; /* 32 bit */ - inst[1] |= (1 << 14); /* src .f32 */ - inst[1] |= ((1 << 6) << 14); /* .abs */ - set_dst(pc, dst[c], inst); - set_src_0(pc, src[0][c], inst); - emit(pc, inst); + if (!(mask & (1 << c))) + continue; + emit_abs(pc, dst[c], src[0][c]); } break; case TGSI_OPCODE_ADD: -- cgit v1.2.3 From 01e36eb531dfb4b1b3fd38d3fc00c6770833b5ea Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 7 Jun 2008 19:54:04 +1000 Subject: nv50: LIT - sort-of *somehow* we have the exact same bug here as on nv4x, the difference being on nv4x the hw actually has a LIT opcode.. NVIDIA doesn't have the bug on either arch FWIW. --- src/gallium/drivers/nv50/nv50_program.c | 36 +++++++++++++++++++++++++++++++-- src/gallium/drivers/nv50/nv50_vbo.c | 1 - 2 files changed, 34 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index fabd016491..186eea5e13 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -13,7 +13,6 @@ #define NV50_SU_MAX_TEMP 64 /* ARL - * LIT - other buggery * * MSB - Like MAD, but MUL+SUB * - Fuck it off, introduce a way to negate args for ops that @@ -23,6 +22,8 @@ * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD, * but can emit to P_TEMP first - then MOV later. NVIDIA does this * + * Hmmm.. what happens if we have src1+src2 both consts.. ouch ! + * * Verify half-insns work where expected - and force disable them where they * don't work - MUL has it forcibly disabled atm as it fixes POW.. */ @@ -164,7 +165,7 @@ alloc_immd(struct nv50_pc *pc, float f) struct nv50_reg *r = CALLOC_STRUCT(nv50_reg); unsigned hw; - hw = ctor_immd(pc, f, 0, 0, 0); + hw = ctor_immd(pc, f, 0, 0, 0) * 4; r->type = P_IMMD; r->hw = hw; r->index = -1; @@ -657,6 +658,34 @@ emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) emit(pc, inst); } +static void +emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, struct nv50_reg **src) +{ + struct nv50_reg *one = alloc_immd(pc, 1.0); + struct nv50_reg *zero = alloc_immd(pc, 0.0); + struct nv50_reg *neg128 = alloc_immd(pc, -127.999999); + struct nv50_reg *pos128 = alloc_immd(pc, 127.999999); + struct nv50_reg *tmp[4]; + + emit_mov(pc, dst[0], one); + emit_mov(pc, dst[3], one); + + tmp[0] = temp_temp(pc); + emit_minmax(pc, 4, dst[1], src[0], zero); + set_pred_wr(pc, 1, 0, &pc->p->insns[pc->p->insns_nr - 2]); + + tmp[1] = temp_temp(pc); + emit_minmax(pc, 4, tmp[1], src[1], zero); + + tmp[3] = temp_temp(pc); + emit_minmax(pc, 4, tmp[3], src[3], neg128); + emit_minmax(pc, 5, tmp[3], tmp[3], pos128); + + emit_pow(pc, dst[2], tmp[1], tmp[3]); + emit_mov(pc, dst[2], zero); + set_pred(pc, 3, 0, &pc->p->insns[pc->p->insns_nr - 2]); +} + static struct nv50_reg * tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) { @@ -861,6 +890,9 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } free_temp(pc, temp); break; + case TGSI_OPCODE_LIT: + emit_lit(pc, &dst[0], &src[0][0]); + break; case TGSI_OPCODE_LG2: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index f086c76258..badbef53df 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -35,7 +35,6 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, struct nv50_context *nv50 = nv50_context(pipe); nv50_state_validate(nv50); - NOUVEAU_ERR("unimplemented\n"); BEGIN_RING(tesla, 0x142c, 1); OUT_RING (0); -- cgit v1.2.3 From 9a37a56c8ab8c64bdadb1e1e807f885d6a5e3121 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sun, 8 Jun 2008 11:23:06 +1000 Subject: nv50: obey writemask in a couple of places --- src/gallium/drivers/nv50/nv50_program.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 186eea5e13..7206e5d280 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -12,7 +12,7 @@ #define NV50_SU_MAX_TEMP 64 -/* ARL +/* ARL - gallium craps itself on progs/vp/arl.txt * * MSB - Like MAD, but MUL+SUB * - Fuck it off, introduce a way to negate args for ops that @@ -856,10 +856,14 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) case TGSI_OPCODE_DST: { struct nv50_reg *one = alloc_immd(pc, 1.0); - emit_mov(pc, dst[0], one); - emit_mul(pc, dst[1], src[0][1], src[1][1]); - emit_mov(pc, dst[2], src[0][2]); - emit_mov(pc, dst[3], src[1][3]); + if (mask & (1 << 0)) + emit_mov(pc, dst[0], one); + if (mask & (1 << 1)) + emit_mul(pc, dst[1], src[0][1], src[1][1]); + if (mask & (1 << 2)) + emit_mov(pc, dst[2], src[0][2]); + if (mask & (1 << 3)) + emit_mov(pc, dst[3], src[1][3]); FREE(one); } break; @@ -891,6 +895,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) free_temp(pc, temp); break; case TGSI_OPCODE_LIT: + /*XXX: writemask */ emit_lit(pc, &dst[0], &src[0][0]); break; case TGSI_OPCODE_LG2: @@ -989,12 +994,18 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) break; case TGSI_OPCODE_XPD: temp = alloc_temp(pc, NULL); - emit_mul(pc, temp, src[0][2], src[1][1]); - emit_msb(pc, dst[0], src[0][1], src[1][2], temp); - emit_mul(pc, temp, src[0][0], src[1][2]); - emit_msb(pc, dst[1], src[0][2], src[1][0], temp); - emit_mul(pc, temp, src[0][1], src[1][0]); - emit_msb(pc, dst[2], src[0][0], src[1][1], temp); + if (mask & (1 << 0)) { + emit_mul(pc, temp, src[0][2], src[1][1]); + emit_msb(pc, dst[0], src[0][1], src[1][2], temp); + } + if (mask & (1 << 1)) { + emit_mul(pc, temp, src[0][0], src[1][2]); + emit_msb(pc, dst[1], src[0][2], src[1][0], temp); + } + if (mask & (1 << 2)) { + emit_mul(pc, temp, src[0][1], src[1][0]); + emit_msb(pc, dst[2], src[0][0], src[1][1], temp); + } free_temp(pc, temp); break; case TGSI_OPCODE_END: -- cgit v1.2.3 From afcaeaa0e4dc3ced40621c76304a2c0c5a3ab403 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sun, 8 Jun 2008 14:12:01 +1000 Subject: nv50: note a critical bug --- src/gallium/drivers/nv50/nv50_program.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 7206e5d280..b151b540bf 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -26,6 +26,9 @@ * * Verify half-insns work where expected - and force disable them where they * don't work - MUL has it forcibly disabled atm as it fixes POW.. + * + * FUCK! watch dst==src vectors, can overwrite components that are needed. + * ie. SUB R0, R0.yzxw, R0 */ struct nv50_reg { enum { -- cgit v1.2.3 From 34a039ae7b158cacb5b20d91067e9d6458d30a56 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sun, 8 Jun 2008 15:51:54 +1000 Subject: nv50: fix src1 & src2 == const --- src/gallium/drivers/nv50/nv50_program.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index b151b540bf..936a8ef9b3 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -22,8 +22,6 @@ * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD, * but can emit to P_TEMP first - then MOV later. NVIDIA does this * - * Hmmm.. what happens if we have src1+src2 both consts.. ouch ! - * * Verify half-insns work where expected - and force disable them where they * don't work - MUL has it forcibly disabled atm as it fixes POW.. * @@ -391,8 +389,16 @@ set_src_1(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) src = temp; } else if (src->type == P_CONST || src->type == P_IMMD) { - set_cseg(pc, src, inst); - inst[0] |= 0x00800000; + assert(!(inst[0] & 0x00800000)); + if (inst[0] & 0x01000000) { + struct nv50_reg *temp = temp_temp(pc); + + emit_mov(pc, temp, src); + src = temp; + } else { + set_cseg(pc, src, inst); + inst[0] |= 0x00800000; + } } alloc_reg(pc, src); @@ -411,8 +417,16 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) src = temp; } else if (src->type == P_CONST || src->type == P_IMMD) { - set_cseg(pc, src, inst); - inst[0] |= 0x01000000; + assert(!(inst[0] & 0x01000000)); + if (inst[0] & 0x00800000) { + struct nv50_reg *temp = temp_temp(pc); + + emit_mov(pc, temp, src); + src = temp; + } else { + set_cseg(pc, src, inst); + inst[0] |= 0x01000000; + } } alloc_reg(pc, src); -- cgit v1.2.3 From 776e9581d16fc0fd28058fbcd879756fd5d40b96 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 9 Jun 2008 13:07:38 +1000 Subject: nv50: delayed write of fragprog result regs until end of program --- src/gallium/drivers/nv50/nv50_program.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 936a8ef9b3..1733cf97b7 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -27,6 +27,8 @@ * * FUCK! watch dst==src vectors, can overwrite components that are needed. * ie. SUB R0, R0.yzxw, R0 + * + * NV50_PROG* -> PIPE_SHADER* */ struct nv50_reg { enum { @@ -1198,11 +1200,13 @@ nv50_program_tx_prep(struct nv50_pc *pc) for (i = 0; i < pc->result_nr; i++) { for (c = 0; c < 4; c++) { - if (pc->p->type == NV50_PROG_FRAGMENT) + if (pc->p->type == NV50_PROG_FRAGMENT) { pc->result[i*4+c].type = P_TEMP; - else + pc->result[i*4+c].hw = -1; + } else { pc->result[i*4+c].type = P_RESULT; - pc->result[i*4+c].hw = rid++; + pc->result[i*4+c].hw = rid++; + } pc->result[i*4+c].index = i; } } @@ -1281,6 +1285,14 @@ nv50_program_tx(struct nv50_program *p) } } + if (p->type == NV50_PROG_FRAGMENT) { + struct nv50_reg out; + + out.type = P_TEMP; + for (out.hw = 0; out.hw < pc->result_nr * 4; out.hw++) + emit_mov(pc, &out, &pc->result[out.hw]); + } + p->immd_nr = pc->immd_nr * 4; p->immd = pc->immd_buf; -- cgit v1.2.3 From b5bbf09c42a9d563984fad875ced5c4814033a3d Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 9 Jun 2008 13:09:55 +1000 Subject: nv50: remove NV50_PROG_{VERTEX,FRAGMENT} --- src/gallium/drivers/nv50/nv50_program.c | 19 +++++++++++-------- src/gallium/drivers/nv50/nv50_state.c | 4 ++-- src/gallium/drivers/nv50/nv50_state.h | 5 +---- 3 files changed, 14 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 1733cf97b7..6b8ba1bdfe 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -28,7 +28,10 @@ * FUCK! watch dst==src vectors, can overwrite components that are needed. * ie. SUB R0, R0.yzxw, R0 * - * NV50_PROG* -> PIPE_SHADER* + * Things to check with renouveau: + * SGE/SLT with needed src0/1 swap + * FP attr/result assignment - how? + * FP/VP constbuf usage */ struct nv50_reg { enum { @@ -289,7 +292,7 @@ set_cseg(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) if (src->type == P_IMMD) { inst[1] |= (NV50_CB_PMISC << 22); } else { - if (pc->p->type == NV50_PROG_VERTEX) + if (pc->p->type == PIPE_SHADER_VERTEX) inst[1] |= (NV50_CB_PVP << 22); else inst[1] |= (NV50_CB_PFP << 22); @@ -1144,7 +1147,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (!pc->attr) goto out_err; - if (pc->p->type == NV50_PROG_FRAGMENT) { + if (pc->p->type == PIPE_SHADER_FRAGMENT) { iv = alloc_temp(pc, NULL); aid++; } @@ -1153,7 +1156,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) struct nv50_reg *a = &pc->attr[i*4]; for (c = 0; c < 4; c++) { - if (pc->p->type == NV50_PROG_FRAGMENT) { + if (pc->p->type == PIPE_SHADER_FRAGMENT) { struct nv50_reg *at = alloc_temp(pc, NULL); pc->attr[i*4+c].type = at->type; @@ -1168,7 +1171,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) } } - if (pc->p->type != NV50_PROG_FRAGMENT) + if (pc->p->type != PIPE_SHADER_FRAGMENT) continue; emit_interp(pc, iv, iv, iv, FALSE); @@ -1200,7 +1203,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) for (i = 0; i < pc->result_nr; i++) { for (c = 0; c < 4; c++) { - if (pc->p->type == NV50_PROG_FRAGMENT) { + if (pc->p->type == PIPE_SHADER_FRAGMENT) { pc->result[i*4+c].type = P_TEMP; pc->result[i*4+c].hw = -1; } else { @@ -1285,7 +1288,7 @@ nv50_program_tx(struct nv50_program *p) } } - if (p->type == NV50_PROG_FRAGMENT) { + if (p->type == PIPE_SHADER_FRAGMENT) { struct nv50_reg out; out.type = P_TEMP; @@ -1314,7 +1317,7 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) * NOT immd - otherwise it's fucked fucked fucked */ p->insns[p->insns_nr - 1] |= 0x00000001; - if (p->type == NV50_PROG_VERTEX) { + if (p->type == PIPE_SHADER_VERTEX) { for (i = 0; i < p->insns_nr; i++) NOUVEAU_ERR("VP0x%08x\n", p->insns[i]); } else { diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 48d09c296b..774117326a 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -331,7 +331,7 @@ nv50_vp_state_create(struct pipe_context *pipe, struct nv50_program *p = CALLOC_STRUCT(nv50_program); p->pipe = *cso; - p->type = NV50_PROG_VERTEX; + p->type = PIPE_SHADER_VERTEX; tgsi_scan_shader(p->pipe.tokens, &p->info); return (void *)p; } @@ -361,7 +361,7 @@ nv50_fp_state_create(struct pipe_context *pipe, struct nv50_program *p = CALLOC_STRUCT(nv50_program); p->pipe = *cso; - p->type = NV50_PROG_FRAGMENT; + p->type = PIPE_SHADER_FRAGMENT; tgsi_scan_shader(p->pipe.tokens, &p->info); return (void *)p; } diff --git a/src/gallium/drivers/nv50/nv50_state.h b/src/gallium/drivers/nv50/nv50_state.h index dd352b6585..d568bf610d 100644 --- a/src/gallium/drivers/nv50/nv50_state.h +++ b/src/gallium/drivers/nv50/nv50_state.h @@ -15,10 +15,7 @@ struct nv50_program { struct tgsi_shader_info info; boolean translated; - enum { - NV50_PROG_VERTEX, - NV50_PROG_FRAGMENT - } type; + unsigned type; unsigned *insns; unsigned insns_nr; -- cgit v1.2.3 From 713ef6ccd2590bd866598bb6d4f646e9ec29ba78 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 9 Jun 2008 13:31:42 +1000 Subject: nv50: use emit_flop() instead of building RCP manually on interp --- src/gallium/drivers/nv50/nv50_program.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 6b8ba1bdfe..592435585d 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1176,12 +1176,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) emit_interp(pc, iv, iv, iv, FALSE); tmp = alloc_temp(pc, NULL); - { - unsigned inst[2] = { 0, 0 }; - inst[0] = 0x90000000; - inst[0] |= (tmp->hw << 2); - emit(pc, inst); - } + emit_flop(pc, 0, tmp, iv); emit_interp(pc, &a[0], &a[0], tmp, TRUE); emit_interp(pc, &a[1], &a[1], tmp, TRUE); emit_interp(pc, &a[2], &a[2], tmp, TRUE); -- cgit v1.2.3 From 6d0f7ea95475009ee17862786469f7b9a34a797f Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 9 Jun 2008 16:26:51 +1000 Subject: nv50: note some things discovered during renouveau session --- src/gallium/drivers/nv50/nv50_program.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 592435585d..0274545131 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -28,10 +28,29 @@ * FUCK! watch dst==src vectors, can overwrite components that are needed. * ie. SUB R0, R0.yzxw, R0 * + * MOV dst, -src + * "delta" tmp, -src (0xa0000204,0xe4004780 - delta r0, -r0) + * mov dst, tmp + * * Things to check with renouveau: - * SGE/SLT with needed src0/1 swap * FP attr/result assignment - how? - * FP/VP constbuf usage + * attrib + * - 0x16bc maps vp output onto fp hpos + * - 0x16c0 maps vp output onto fp col0 + * result + * - colr always 0-3 + * - depr always 4 + * 0x16bc->0x16e8 --> some binding between vp/fp regs + * 0x16b8 --> VP output count + * + * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005 + * "MOV rcol.x, fcol.y" = 0x00000004 + * 0x19a8 --> as above but 0x00000100 and 0x00000000 + * - 0x00100000 used when KIL used + * 0x196c --> as above but 0x00000011 and 0x00000000 + * + * 0x1988 --> 0xXXNNNNNN + * - XX == FP high something */ struct nv50_reg { enum { -- cgit v1.2.3 From 31f6a24b59b0ac18e04336d2e3cbaa643358c88a Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 9 Jun 2008 16:35:07 +1000 Subject: nv50: support the other TGSI_UTIL_SIGN modes --- src/gallium/drivers/nv50/nv50_program.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 0274545131..44914f41b6 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -727,6 +727,22 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, struct nv50_reg **src) set_pred(pc, 3, 0, &pc->p->insns[pc->p->insns_nr - 2]); } +static void +emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + unsigned inst[2] = { 0, 0 }; + + set_long(pc, inst); + inst[0] |= 0xa0000000; /* delta */ + inst[1] |= (7 << 29); /* delta */ + inst[1] |= 0x04000000; /* negate arg0? probably not */ + inst[1] |= (1 << 14); /* src .f32 */ + set_dst(pc, dst, inst); + set_src_0(pc, src, inst); + + emit(pc, inst); +} + static struct nv50_reg * tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) { @@ -794,6 +810,17 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src) emit_abs(pc, temp, r); r = temp; break; + case TGSI_UTIL_SIGN_TOGGLE: + temp = temp_temp(pc); + emit_neg(pc, temp, r); + r = temp; + break; + case TGSI_UTIL_SIGN_SET: + temp = temp_temp(pc); + emit_abs(pc, temp, r); + emit_neg(pc, temp, r); + r = temp; + break; default: assert(0); break; -- cgit v1.2.3 From 51ea3aae03154046316b814053f7493bdb10c853 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 9 Jun 2008 16:41:08 +1000 Subject: nv50: fix SGE/SLT when sources need swapping --- src/gallium/drivers/nv50/nv50_program.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 44914f41b6..21341619de 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -601,24 +601,17 @@ emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) emit(pc, inst); } -/*XXX: inaccurate results.. why? */ -#define ALLOW_SET_SWAP 0 - static void emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1) { unsigned inst[2] = { 0, 0 }; -#if ALLOW_SET_SWAP - unsigned inv_cop[8] = { 0, 6, 2, 4, 3, 5, 1, 7 }; -#endif + unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; struct nv50_reg *rdst; -#if ALLOW_SET_SWAP assert(c_op <= 7); if (check_swap_src_0_1(pc, &src0, &src1)) c_op = inv_cop[c_op]; -#endif rdst = dst; if (dst->type != P_TEMP) -- cgit v1.2.3 From 454394e749feca5ac00e7a270e6ca5529581d228 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 9 Jun 2008 17:36:22 +1000 Subject: nv50: quick hack so progs/fp/fp-tri works for the moment --- src/gallium/drivers/nv50/nv50_context.h | 4 ++-- src/gallium/drivers/nv50/nv50_program.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index c4a8a4c064..69c9dba675 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -24,8 +24,8 @@ fprintf(stderr, "nouveau: "fmt, ##args); /* Constant buffer assignment */ -#define NV50_CB_PMISC 0 -#define NV50_CB_PVP 1 +#define NV50_CB_PMISC 1 +#define NV50_CB_PVP 0 #define NV50_CB_PFP 2 #define NV50_CB_PGP 3 #define NV50_CB_TIC 4 diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 21341619de..843e036251 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1369,7 +1369,7 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) for (i = 0; i < p->immd_nr; i++) { BEGIN_RING(tesla, 0x0f00, 2); - OUT_RING ((NV50_CB_PMISC << 16) | (i << 8)); + OUT_RING ((NV50_CB_PMISC << 0) | (i << 8)); OUT_RING (fui(p->immd[i])); } } -- cgit v1.2.3 From 21e688e0a3faeef18b07c4d860bd71cc6e3ddf4a Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 9 Jun 2008 18:01:03 +1000 Subject: nv50: LRP --- src/gallium/drivers/nv50/nv50_program.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 843e036251..33822a1a78 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -85,7 +85,7 @@ struct nv50_pc { float *immd_buf; int immd_nr; - struct nv50_reg *temp_temp[8]; + struct nv50_reg *temp_temp[16]; unsigned temp_temp_nr; }; @@ -154,7 +154,7 @@ free_temp(struct nv50_pc *pc, struct nv50_reg *r) static struct nv50_reg * temp_temp(struct nv50_pc *pc) { - if (pc->temp_temp_nr >= 8) + if (pc->temp_temp_nr >= 16) assert(0); pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); @@ -966,6 +966,18 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_flop(pc, 3, dst[c], src[0][c]); } break; + case TGSI_OPCODE_LRP: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + /*XXX: we can do better than this */ + temp = alloc_temp(pc, NULL); + emit_neg(pc, temp, src[0][c]); + emit_mad(pc, temp, temp, src[2][c], src[2][c]); + emit_mad(pc, dst[c], src[0][c], src[1][c], temp); + free_temp(pc, temp); + } + break; case TGSI_OPCODE_MAD: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) -- cgit v1.2.3 From 7b7df34781844c39998d60bbb60880d960da3fb1 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 9 Jun 2008 19:50:17 +1000 Subject: nv50: various fixes + SCS --- src/gallium/drivers/nv50/nv50_program.c | 89 ++++++++++++++++++++++++--------- 1 file changed, 66 insertions(+), 23 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 33822a1a78..d69eb4b86b 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -601,6 +601,21 @@ emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) emit(pc, inst); } +static void +emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + unsigned inst[2] = { 0, 0 }; + + inst[0] |= 0xb0000000; + + set_dst(pc, dst, inst); + set_src_0(pc, src, inst); + set_long(pc, inst); + inst[1] |= (6 << 29); + + emit(pc, inst); +} + static void emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1) @@ -693,7 +708,8 @@ emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) } static void -emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, struct nv50_reg **src) +emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, + struct nv50_reg **src) { struct nv50_reg *one = alloc_immd(pc, 1.0); struct nv50_reg *zero = alloc_immd(pc, 0.0); @@ -701,23 +717,34 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, struct nv50_reg **src) struct nv50_reg *pos128 = alloc_immd(pc, 127.999999); struct nv50_reg *tmp[4]; - emit_mov(pc, dst[0], one); - emit_mov(pc, dst[3], one); + if (mask & (1 << 0)) + emit_mov(pc, dst[0], one); + + if (mask & (1 << 3)) + emit_mov(pc, dst[3], one); + + if (mask & (3 << 1)) { + if (mask & (1 << 1)) + tmp[0] = dst[1]; + else + tmp[0] = temp_temp(pc); + emit_minmax(pc, 4, tmp[0], src[0], zero); + } - tmp[0] = temp_temp(pc); - emit_minmax(pc, 4, dst[1], src[0], zero); - set_pred_wr(pc, 1, 0, &pc->p->insns[pc->p->insns_nr - 2]); + if (mask & (1 << 2)) { + set_pred_wr(pc, 1, 0, &pc->p->insns[pc->p->insns_nr - 2]); - tmp[1] = temp_temp(pc); - emit_minmax(pc, 4, tmp[1], src[1], zero); + tmp[1] = temp_temp(pc); + emit_minmax(pc, 4, tmp[1], src[1], zero); - tmp[3] = temp_temp(pc); - emit_minmax(pc, 4, tmp[3], src[3], neg128); - emit_minmax(pc, 5, tmp[3], tmp[3], pos128); + tmp[3] = temp_temp(pc); + emit_minmax(pc, 4, tmp[3], src[3], neg128); + emit_minmax(pc, 5, tmp[3], tmp[3], pos128); - emit_pow(pc, dst[2], tmp[1], tmp[3]); - emit_mov(pc, dst[2], zero); - set_pred(pc, 3, 0, &pc->p->insns[pc->p->insns_nr - 2]); + emit_pow(pc, dst[2], tmp[1], tmp[3]); + emit_mov(pc, dst[2], zero); + set_pred(pc, 3, 0, &pc->p->insns[pc->p->insns_nr - 2]); + } } static void @@ -870,10 +897,13 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } break; case TGSI_OPCODE_COS: + temp = alloc_temp(pc, NULL); + emit_precossin(pc, temp, src[0][0]); + emit_flop(pc, 5, temp, temp); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_flop(pc, 5, dst[c], src[0][c]); + emit_mov(pc, dst[c], temp); } break; case TGSI_OPCODE_DP3: @@ -930,11 +960,12 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) break; case TGSI_OPCODE_EX2: temp = alloc_temp(pc, NULL); + emit_preex2(pc, temp, src[0][0]); + emit_flop(pc, 6, temp, temp); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_preex2(pc, temp, src[0][c]); - emit_flop(pc, 6, dst[c], temp); + emit_mov(pc, dst[c], temp); } free_temp(pc, temp); break; @@ -956,14 +987,15 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) free_temp(pc, temp); break; case TGSI_OPCODE_LIT: - /*XXX: writemask */ - emit_lit(pc, &dst[0], &src[0][0]); + emit_lit(pc, &dst[0], mask, &src[0][0]); break; case TGSI_OPCODE_LG2: + temp = alloc_temp(pc, NULL); + emit_flop(pc, 3, temp, src[0][0]); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_flop(pc, 3, dst[c], src[0][c]); + emit_mov(pc, dst[c], temp); } break; case TGSI_OPCODE_LRP: @@ -1027,16 +1059,24 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_flop(pc, 0, dst[c], src[0][c]); + emit_flop(pc, 0, dst[c], src[0][0]); } break; case TGSI_OPCODE_RSQ: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_flop(pc, 2, dst[c], src[0][c]); + emit_flop(pc, 2, dst[c], src[0][0]); } break; + case TGSI_OPCODE_SCS: + temp = alloc_temp(pc, NULL); + emit_precossin(pc, temp, src[0][0]); + if (mask & (1 << 0)) + emit_flop(pc, 5, dst[0], temp); + if (mask & (1 << 1)) + emit_flop(pc, 4, dst[1], temp); + break; case TGSI_OPCODE_SGE: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) @@ -1045,10 +1085,13 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } break; case TGSI_OPCODE_SIN: + temp = alloc_temp(pc, NULL); + emit_precossin(pc, temp, src[0][0]); + emit_flop(pc, 4, temp, temp); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_flop(pc, 4, dst[c], src[0][c]); + emit_mov(pc, dst[c], temp); } break; case TGSI_OPCODE_SLT: -- cgit v1.2.3 From 7e9f6e290da23d7963857241b541e00c1fcf20dc Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 11 Jun 2008 13:00:20 +1000 Subject: nv50: move some magics --- src/gallium/drivers/nv50/nv50_program.c | 16 ++++++++++++++-- src/gallium/drivers/nv50/nv50_screen.c | 9 --------- 2 files changed, 14 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index d69eb4b86b..934b67fb71 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1492,14 +1492,26 @@ nv50_fragprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(7, 2); + so = so_new(64, 2); so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_method(so, tesla, 0x198c, 1); + so_method(so, tesla, 0x1904, 4); + so_data (so, 0x01040404); /* p: 0x01000404 */ + so_data (so, 0x00000004); + so_data (so, 0x00000000); + so_data (so, 0x00000000); + so_method(so, tesla, 0x16bc, 2); + so_data (so, 0x03020100); + so_data (so, 0x07060504); + so_method(so, tesla, 0x1988, 2); + so_data (so, 0x08040404); /* p: 0x0f000401 */ so_data (so, p->cfg.high_temp); + so_method(so, tesla, 0x16ac, 2); + so_data (so, 0x00000008); /* p: 0x00000004 */ + so_data (so, 0x00000004); so_method(so, tesla, 0x1414, 1); so_data (so, 0); /* program start offset */ so_emit(nv50->screen->nvws, so); diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 6c0810a9cf..2417bf8001 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -225,15 +225,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) so_data (so, 0x54); so_method(so, screen->tesla, 0x13ac, 1); so_data (so, 1); - so_method(so, screen->tesla, 0x16bc, 2); - so_data (so, 0x03020100); - so_data (so, 0x07060504); - so_method(so, screen->tesla, 0x1988, 2); - so_data (so, 0x08040404); - so_data (so, 0x00000004); - so_method(so, screen->tesla, 0x16ac, 2); - so_data (so, 8); - so_data (so, 4); so_method(so, screen->tesla, 0x16b8, 1); so_data (so, 8); -- cgit v1.2.3 From bce558b37cde4be5c70117f49a2570f2988e5849 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 11 Jun 2008 13:15:23 +1000 Subject: Revert "nv50: move some magics" This reverts commit 0a38de30429d3075fc6dfc9ff3729c5ca11f0c2f. --- src/gallium/drivers/nv50/nv50_program.c | 16 ++-------------- src/gallium/drivers/nv50/nv50_screen.c | 9 +++++++++ 2 files changed, 11 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 934b67fb71..d69eb4b86b 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1492,26 +1492,14 @@ nv50_fragprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(64, 2); + so = so_new(7, 2); so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_method(so, tesla, 0x1904, 4); - so_data (so, 0x01040404); /* p: 0x01000404 */ - so_data (so, 0x00000004); - so_data (so, 0x00000000); - so_data (so, 0x00000000); - so_method(so, tesla, 0x16bc, 2); - so_data (so, 0x03020100); - so_data (so, 0x07060504); - so_method(so, tesla, 0x1988, 2); - so_data (so, 0x08040404); /* p: 0x0f000401 */ + so_method(so, tesla, 0x198c, 1); so_data (so, p->cfg.high_temp); - so_method(so, tesla, 0x16ac, 2); - so_data (so, 0x00000008); /* p: 0x00000004 */ - so_data (so, 0x00000004); so_method(so, tesla, 0x1414, 1); so_data (so, 0); /* program start offset */ so_emit(nv50->screen->nvws, so); diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 2417bf8001..6c0810a9cf 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -225,6 +225,15 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) so_data (so, 0x54); so_method(so, screen->tesla, 0x13ac, 1); so_data (so, 1); + so_method(so, screen->tesla, 0x16bc, 2); + so_data (so, 0x03020100); + so_data (so, 0x07060504); + so_method(so, screen->tesla, 0x1988, 2); + so_data (so, 0x08040404); + so_data (so, 0x00000004); + so_method(so, screen->tesla, 0x16ac, 2); + so_data (so, 8); + so_data (so, 4); so_method(so, screen->tesla, 0x16b8, 1); so_data (so, 8); -- cgit v1.2.3 From 585ae74d87f3d04a4b5b7c068b865292afd1a16b Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 11 Jun 2008 13:24:32 +1000 Subject: nv50: move magics take 2 --- src/gallium/drivers/nv50/nv50_program.c | 13 +++++++++++-- src/gallium/drivers/nv50/nv50_screen.c | 9 --------- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index d69eb4b86b..b73003123d 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1492,13 +1492,22 @@ nv50_fragprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(7, 2); + so = so_new(64, 2); so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_method(so, tesla, 0x198c, 1); + so_method(so, tesla, 0x1904, 4); + so_data (so, 0x01040404); /* p: 0x01000404 */ + so_data (so, 0x00000004); + so_data (so, 0x00000000); + so_data (so, 0x00000000); + so_method(so, tesla, 0x16bc, 2); /*XXX: fixme */ + so_data (so, 0x03020100); + so_data (so, 0x07060504); + so_method(so, tesla, 0x1988, 2); + so_data (so, 0x08040404); /* p: 0x0f000401 */ so_data (so, p->cfg.high_temp); so_method(so, tesla, 0x1414, 1); so_data (so, 0); /* program start offset */ diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 6c0810a9cf..2417bf8001 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -225,15 +225,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) so_data (so, 0x54); so_method(so, screen->tesla, 0x13ac, 1); so_data (so, 1); - so_method(so, screen->tesla, 0x16bc, 2); - so_data (so, 0x03020100); - so_data (so, 0x07060504); - so_method(so, screen->tesla, 0x1988, 2); - so_data (so, 0x08040404); - so_data (so, 0x00000004); - so_method(so, screen->tesla, 0x16ac, 2); - so_data (so, 8); - so_data (so, 4); so_method(so, screen->tesla, 0x16b8, 1); so_data (so, 8); -- cgit v1.2.3 From 19a1e9015e4ae429ab26e56848104fa209590338 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 11 Jun 2008 14:59:19 +1000 Subject: nv50: rename nv50_state.h to nv50_program.h --- src/gallium/drivers/nv50/nv50_context.h | 2 +- src/gallium/drivers/nv50/nv50_program.c | 1 - src/gallium/drivers/nv50/nv50_program.h | 37 +++++++++++++++++++++++++++++++++ src/gallium/drivers/nv50/nv50_state.c | 1 - src/gallium/drivers/nv50/nv50_state.h | 37 --------------------------------- src/gallium/drivers/nv50/nv50_vbo.c | 1 - 6 files changed, 38 insertions(+), 41 deletions(-) create mode 100644 src/gallium/drivers/nv50/nv50_program.h delete mode 100644 src/gallium/drivers/nv50/nv50_state.h (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 69c9dba675..72d859c468 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -15,8 +15,8 @@ struct nv50_screen *ctx = nv50->screen #include "nouveau/nouveau_push.h" -#include "nv50_state.h" #include "nv50_screen.h" +#include "nv50_program.h" #define NOUVEAU_ERR(fmt, args...) \ fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index b73003123d..2118d8ef85 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -8,7 +8,6 @@ #include "tgsi/util/tgsi_util.h" #include "nv50_context.h" -#include "nv50_state.h" #define NV50_SU_MAX_TEMP 64 diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h new file mode 100644 index 0000000000..dd5aed799a --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -0,0 +1,37 @@ +#ifndef __NV50_PROGRAM_H__ +#define __NV50_PROGRAM_H__ + +#include "pipe/p_state.h" +#include "tgsi/util/tgsi_scan.h" + +struct nv50_program_data { + int index; + int component; + float value; +}; + +struct nv50_program { + struct pipe_shader_state pipe; + struct tgsi_shader_info info; + boolean translated; + + unsigned type; + unsigned *insns; + unsigned insns_nr; + + struct pipe_buffer *buffer; + + float *immd; + unsigned immd_nr; + + struct nouveau_resource *data; + + struct { + unsigned high_temp; + struct { + unsigned attr[2]; + } vp; + } cfg; +}; + +#endif diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 774117326a..ed6cca9a0d 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -3,7 +3,6 @@ #include "pipe/p_util.h" #include "nv50_context.h" -#include "nv50_state.h" #include "nouveau/nouveau_stateobj.h" diff --git a/src/gallium/drivers/nv50/nv50_state.h b/src/gallium/drivers/nv50/nv50_state.h deleted file mode 100644 index d568bf610d..0000000000 --- a/src/gallium/drivers/nv50/nv50_state.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef __NV50_STATE_H__ -#define __NV50_STATE_H__ - -#include "pipe/p_state.h" -#include "tgsi/util/tgsi_scan.h" - -struct nv50_program_data { - int index; - int component; - float value; -}; - -struct nv50_program { - struct pipe_shader_state pipe; - struct tgsi_shader_info info; - boolean translated; - - unsigned type; - unsigned *insns; - unsigned insns_nr; - - struct pipe_buffer *buffer; - - float *immd; - unsigned immd_nr; - - struct nouveau_resource *data; - - struct { - unsigned high_temp; - struct { - unsigned attr[2]; - } vp; - } cfg; -}; - -#endif diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index badbef53df..140d60cc9a 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -3,7 +3,6 @@ #include "pipe/p_util.h" #include "nv50_context.h" -#include "nv50_state.h" static INLINE unsigned nv50_prim(unsigned mode) -- cgit v1.2.3 From 40137ea2631a0c8158f99ae30ca90ed038b72076 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 12 Jun 2008 12:16:43 +1000 Subject: nv50: carry instructions around in nv50_program_exec, not a flat array --- src/gallium/drivers/nv50/nv50_program.c | 482 +++++++++++++++++--------------- src/gallium/drivers/nv50/nv50_program.h | 22 +- 2 files changed, 268 insertions(+), 236 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 2118d8ef85..3df3848761 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -196,168 +196,175 @@ alloc_immd(struct nv50_pc *pc, float f) return r; } +static struct nv50_program_exec * +exec(struct nv50_pc *pc) +{ + struct nv50_program_exec *e = CALLOC_STRUCT(nv50_program_exec); + + return e; +} + static void -emit(struct nv50_pc *pc, unsigned *inst) +emit(struct nv50_pc *pc, struct nv50_program_exec *e) { struct nv50_program *p = pc->p; - if (inst[0] & 1) { - p->insns_nr += 2; - p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); - memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2); - } else { - p->insns_nr += 1; - p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); - memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned)); - } + if (p->exec_tail) + p->exec_tail->next = e; + if (!p->exec_head) + p->exec_head = e; + p->exec_tail = e; + p->exec_size += (e->inst[0] & 1) ? 2 : 1; } -static INLINE void set_long(struct nv50_pc *, unsigned *); +static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *); static boolean -is_long(unsigned *inst) +is_long(struct nv50_program_exec *e) { - if (inst[0] & 1) + if (e->inst[0] & 1) return TRUE; return FALSE; } static boolean -is_immd(unsigned *inst) +is_immd(struct nv50_program_exec *e) { - if (is_long(inst) && (inst[1] & 3) == 3) + if (is_long(e) && (e->inst[1] & 3) == 3) return TRUE; return FALSE; } static INLINE void -set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, unsigned *inst) +set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, + struct nv50_program_exec *e) { - set_long(pc, inst); - inst[1] &= ~((0x1f << 7) | (0x3 << 12)); - inst[1] |= (pred << 7) | (idx << 12); + set_long(pc, e); + e->inst[1] &= ~((0x1f << 7) | (0x3 << 12)); + e->inst[1] |= (pred << 7) | (idx << 12); } static INLINE void -set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, unsigned *inst) +set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, + struct nv50_program_exec *e) { - set_long(pc, inst); - inst[1] &= ~((0x3 << 4) | (1 << 6)); - inst[1] |= (idx << 4) | (on << 6); + set_long(pc, e); + e->inst[1] &= ~((0x3 << 4) | (1 << 6)); + e->inst[1] |= (idx << 4) | (on << 6); } static INLINE void -set_long(struct nv50_pc *pc, unsigned *inst) +set_long(struct nv50_pc *pc, struct nv50_program_exec *e) { - if (is_long(inst)) + if (is_long(e)) return; - inst[0] |= 1; - set_pred(pc, 0xf, 0, inst); - set_pred_wr(pc, 0, 0, inst); + e->inst[0] |= 1; + set_pred(pc, 0xf, 0, e); + set_pred_wr(pc, 0, 0, e); } static INLINE void -set_dst(struct nv50_pc *pc, struct nv50_reg *dst, unsigned *inst) +set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e) { if (dst->type == P_RESULT) { - set_long(pc, inst); - inst[1] |= 0x00000008; + set_long(pc, e); + e->inst[1] |= 0x00000008; } alloc_reg(pc, dst); - inst[0] |= (dst->hw << 2); + e->inst[0] |= (dst->hw << 2); } static INLINE void -set_immd(struct nv50_pc *pc, struct nv50_reg *imm, unsigned *inst) +set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) { unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */ - set_long(pc, inst); + set_long(pc, e); /*XXX: can't be predicated - bits overlap.. catch cases where both * are required and avoid them. */ - set_pred(pc, 0, 0, inst); - set_pred_wr(pc, 0, 0, inst); + set_pred(pc, 0, 0, e); + set_pred_wr(pc, 0, 0, e); - inst[1] |= 0x00000002 | 0x00000001; - inst[0] |= (val & 0x3f) << 16; - inst[1] |= (val >> 6) << 2; + e->inst[1] |= 0x00000002 | 0x00000001; + e->inst[0] |= (val & 0x3f) << 16; + e->inst[1] |= (val >> 6) << 2; } static void emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective) { - unsigned inst[2] = { 0, 0 }; + struct nv50_program_exec *e = exec(pc); - inst[0] |= 0x80000000; - set_dst(pc, dst, inst); + e->inst[0] |= 0x80000000; + set_dst(pc, dst, e); alloc_reg(pc, iv); - inst[0] |= (iv->hw << 9); + e->inst[0] |= (iv->hw << 9); alloc_reg(pc, src); - inst[0] |= (src->hw << 16); + e->inst[0] |= (src->hw << 16); if (noperspective) - inst[0] |= (1 << 25); + e->inst[0] |= (1 << 25); - emit(pc, inst); + emit(pc, e); } static void -set_cseg(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) +set_cseg(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) { - set_long(pc, inst); + set_long(pc, e); if (src->type == P_IMMD) { - inst[1] |= (NV50_CB_PMISC << 22); + e->inst[1] |= (NV50_CB_PMISC << 22); } else { if (pc->p->type == PIPE_SHADER_VERTEX) - inst[1] |= (NV50_CB_PVP << 22); + e->inst[1] |= (NV50_CB_PVP << 22); else - inst[1] |= (NV50_CB_PFP << 22); + e->inst[1] |= (NV50_CB_PFP << 22); } } static void emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - unsigned inst[2] = { 0, 0 }; + struct nv50_program_exec *e = exec(pc); - inst[0] |= 0x10000000; + e->inst[0] |= 0x10000000; - set_dst(pc, dst, inst); + set_dst(pc, dst, e); if (dst->type != P_RESULT && src->type == P_IMMD) { - set_immd(pc, src, inst); + set_immd(pc, src, e); /*XXX: 32-bit, but steals part of "half" reg space - need to * catch and handle this case if/when we do half-regs */ - inst[0] |= 0x00008000; + e->inst[0] |= 0x00008000; } else if (src->type == P_IMMD || src->type == P_CONST) { - set_long(pc, inst); - set_cseg(pc, src, inst); - inst[0] |= (src->hw << 9); - inst[1] |= 0x20000000; /* src0 const? */ + set_long(pc, e); + set_cseg(pc, src, e); + e->inst[0] |= (src->hw << 9); + e->inst[1] |= 0x20000000; /* src0 const? */ } else { if (src->type == P_ATTR) { - set_long(pc, inst); - inst[1] |= 0x00200000; + set_long(pc, e); + e->inst[1] |= 0x00200000; } alloc_reg(pc, src); - inst[0] |= (src->hw << 9); + e->inst[0] |= (src->hw << 9); } /* We really should support "half" instructions here at some point, * but I don't feel confident enough about them yet. */ - set_long(pc, inst); - if (is_long(inst) && !is_immd(inst)) { - inst[1] |= 0x04000000; /* 32-bit */ - inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */ + set_long(pc, e); + if (is_long(e) && !is_immd(e)) { + e->inst[1] |= 0x04000000; /* 32-bit */ + e->inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */ } - emit(pc, inst); + emit(pc, e); } static boolean @@ -385,11 +392,11 @@ check_swap_src_0_1(struct nv50_pc *pc, } static void -set_src_0(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) +set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) { if (src->type == P_ATTR) { - set_long(pc, inst); - inst[1] |= 0x00200000; + set_long(pc, e); + e->inst[1] |= 0x00200000; } else if (src->type == P_CONST || src->type == P_IMMD) { struct nv50_reg *temp = temp_temp(pc); @@ -399,11 +406,11 @@ set_src_0(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) } alloc_reg(pc, src); - inst[0] |= (src->hw << 9); + e->inst[0] |= (src->hw << 9); } static void -set_src_1(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) +set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) { if (src->type == P_ATTR) { struct nv50_reg *temp = temp_temp(pc); @@ -412,26 +419,26 @@ set_src_1(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) src = temp; } else if (src->type == P_CONST || src->type == P_IMMD) { - assert(!(inst[0] & 0x00800000)); - if (inst[0] & 0x01000000) { + assert(!(e->inst[0] & 0x00800000)); + if (e->inst[0] & 0x01000000) { struct nv50_reg *temp = temp_temp(pc); emit_mov(pc, temp, src); src = temp; } else { - set_cseg(pc, src, inst); - inst[0] |= 0x00800000; + set_cseg(pc, src, e); + e->inst[0] |= 0x00800000; } } alloc_reg(pc, src); - inst[0] |= (src->hw << 16); + e->inst[0] |= (src->hw << 16); } static void -set_src_2(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) +set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) { - set_long(pc, inst); + set_long(pc, e); if (src->type == P_ATTR) { struct nv50_reg *temp = temp_temp(pc); @@ -440,186 +447,186 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) src = temp; } else if (src->type == P_CONST || src->type == P_IMMD) { - assert(!(inst[0] & 0x01000000)); - if (inst[0] & 0x00800000) { + assert(!(e->inst[0] & 0x01000000)); + if (e->inst[0] & 0x00800000) { struct nv50_reg *temp = temp_temp(pc); emit_mov(pc, temp, src); src = temp; } else { - set_cseg(pc, src, inst); - inst[0] |= 0x01000000; + set_cseg(pc, src, e); + e->inst[0] |= 0x01000000; } } alloc_reg(pc, src); - inst[1] |= (src->hw << 14); + e->inst[1] |= (src->hw << 14); } static void emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1) { - unsigned inst[2] = { 0, 0 }; + struct nv50_program_exec *e = exec(pc); - inst[0] |= 0xc0000000; - set_long(pc, inst); + e->inst[0] |= 0xc0000000; + set_long(pc, e); check_swap_src_0_1(pc, &src0, &src1); - set_dst(pc, dst, inst); - set_src_0(pc, src0, inst); - set_src_1(pc, src1, inst); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_1(pc, src1, e); - emit(pc, inst); + emit(pc, e); } static void emit_add(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1) { - unsigned inst[2] = { 0, 0 }; + struct nv50_program_exec *e = exec(pc); - inst[0] |= 0xb0000000; + e->inst[0] |= 0xb0000000; check_swap_src_0_1(pc, &src0, &src1); - set_dst(pc, dst, inst); - set_src_0(pc, src0, inst); - if (is_long(inst)) - set_src_2(pc, src1, inst); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + if (is_long(e)) + set_src_2(pc, src1, e); else - set_src_1(pc, src1, inst); + set_src_1(pc, src1, e); - emit(pc, inst); + emit(pc, e); } static void emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1) { - unsigned inst[2] = { 0, 0 }; + struct nv50_program_exec *e = exec(pc); - set_long(pc, inst); - inst[0] |= 0xb0000000; - inst[1] |= (sub << 29); + set_long(pc, e); + e->inst[0] |= 0xb0000000; + e->inst[1] |= (sub << 29); check_swap_src_0_1(pc, &src0, &src1); - set_dst(pc, dst, inst); - set_src_0(pc, src0, inst); - set_src_1(pc, src1, inst); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_1(pc, src1, e); - emit(pc, inst); + emit(pc, e); } static void emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1) { - unsigned inst[2] = { 0, 0 }; + struct nv50_program_exec *e = exec(pc); - inst[0] |= 0xb0000000; + e->inst[0] |= 0xb0000000; - set_long(pc, inst); + set_long(pc, e); if (check_swap_src_0_1(pc, &src0, &src1)) - inst[1] |= 0x04000000; + e->inst[1] |= 0x04000000; else - inst[1] |= 0x08000000; + e->inst[1] |= 0x08000000; - set_dst(pc, dst, inst); - set_src_0(pc, src0, inst); - set_src_2(pc, src1, inst); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_2(pc, src1, e); - emit(pc, inst); + emit(pc, e); } static void emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2) { - unsigned inst[2] = { 0, 0 }; + struct nv50_program_exec *e = exec(pc); - inst[0] |= 0xe0000000; + e->inst[0] |= 0xe0000000; check_swap_src_0_1(pc, &src0, &src1); - set_dst(pc, dst, inst); - set_src_0(pc, src0, inst); - set_src_1(pc, src1, inst); - set_src_2(pc, src2, inst); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_1(pc, src1, e); + set_src_2(pc, src2, e); - emit(pc, inst); + emit(pc, e); } static void emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2) { - unsigned inst[2] = { 0, 0 }; + struct nv50_program_exec *e = exec(pc); - inst[0] |= 0xe0000000; - set_long(pc, inst); - inst[1] |= 0x08000000; /* src0 * src1 - src2 */ + e->inst[0] |= 0xe0000000; + set_long(pc, e); + e->inst[1] |= 0x08000000; /* src0 * src1 - src2 */ check_swap_src_0_1(pc, &src0, &src1); - set_dst(pc, dst, inst); - set_src_0(pc, src0, inst); - set_src_1(pc, src1, inst); - set_src_2(pc, src2, inst); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_1(pc, src1, e); + set_src_2(pc, src2, e); - emit(pc, inst); + emit(pc, e); } static void emit_flop(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, struct nv50_reg *src) { - unsigned inst[2] = { 0, 0 }; + struct nv50_program_exec *e = exec(pc); - inst[0] |= 0x90000000; + e->inst[0] |= 0x90000000; if (sub) { - set_long(pc, inst); - inst[1] |= (sub << 29); + set_long(pc, e); + e->inst[1] |= (sub << 29); } - set_dst(pc, dst, inst); - set_src_0(pc, src, inst); + set_dst(pc, dst, e); + set_src_0(pc, src, e); - emit(pc, inst); + emit(pc, e); } static void emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - unsigned inst[2] = { 0, 0 }; + struct nv50_program_exec *e = exec(pc); - inst[0] |= 0xb0000000; + e->inst[0] |= 0xb0000000; - set_dst(pc, dst, inst); - set_src_0(pc, src, inst); - set_long(pc, inst); - inst[1] |= (6 << 29) | 0x00004000; + set_dst(pc, dst, e); + set_src_0(pc, src, e); + set_long(pc, e); + e->inst[1] |= (6 << 29) | 0x00004000; - emit(pc, inst); + emit(pc, e); } static void emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - unsigned inst[2] = { 0, 0 }; + struct nv50_program_exec *e = exec(pc); - inst[0] |= 0xb0000000; + e->inst[0] |= 0xb0000000; - set_dst(pc, dst, inst); - set_src_0(pc, src, inst); - set_long(pc, inst); - inst[1] |= (6 << 29); + set_dst(pc, dst, e); + set_src_0(pc, src, e); + set_long(pc, e); + e->inst[1] |= (6 << 29); - emit(pc, inst); + emit(pc, e); } static void emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1) { - unsigned inst[2] = { 0, 0 }; + struct nv50_program_exec *e = exec(pc); unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; struct nv50_reg *rdst; @@ -632,26 +639,27 @@ emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, dst = alloc_temp(pc, NULL); /* set.u32 */ - set_long(pc, inst); - inst[0] |= 0xb0000000; - inst[1] |= (3 << 29); - inst[1] |= (c_op << 14); + set_long(pc, e); + e->inst[0] |= 0xb0000000; + e->inst[1] |= (3 << 29); + e->inst[1] |= (c_op << 14); /*XXX: breaks things, .u32 by default? * decuda will disasm as .u16 and use .lo/.hi regs, but this * doesn't seem to match what the hw actually does. inst[1] |= 0x04000000; << breaks things.. .u32 by default? */ - set_dst(pc, dst, inst); - set_src_0(pc, src0, inst); - set_src_1(pc, src1, inst); - emit(pc, inst); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_1(pc, src1, e); + emit(pc, e); /* cvt.f32.u32 */ - inst[0] = 0xa0000001; - inst[1] = 0x64014780; - set_dst(pc, rdst, inst); - set_src_0(pc, dst, inst); - emit(pc, inst); + e = exec(pc); + e->inst[0] = 0xa0000001; + e->inst[1] = 0x64014780; + set_dst(pc, rdst, e); + set_src_0(pc, dst, e); + emit(pc, e); if (dst != rdst) free_temp(pc, dst); @@ -660,19 +668,19 @@ emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, static void emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - unsigned inst[2] = { 0, 0 }; - - inst[0] = 0xa0000000; /* cvt */ - set_long(pc, inst); - inst[1] |= (6 << 29); /* cvt */ - inst[1] |= 0x08000000; /* integer mode */ - inst[1] |= 0x04000000; /* 32 bit */ - inst[1] |= ((0x1 << 3)) << 14; /* .rn */ - inst[1] |= (1 << 14); /* src .f32 */ - set_dst(pc, dst, inst); - set_src_0(pc, src, inst); - - emit(pc, inst); + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0xa0000000; /* cvt */ + set_long(pc, e); + e->inst[1] |= (6 << 29); /* cvt */ + e->inst[1] |= 0x08000000; /* integer mode */ + e->inst[1] |= 0x04000000; /* 32 bit */ + e->inst[1] |= ((0x1 << 3)) << 14; /* .rn */ + e->inst[1] |= (1 << 14); /* src .f32 */ + set_dst(pc, dst, e); + set_src_0(pc, src, e); + + emit(pc, e); } static void @@ -692,18 +700,18 @@ emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, static void emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - unsigned inst[2] = { 0, 0 }; - - inst[0] = 0xa0000000; /* cvt */ - set_long(pc, inst); - inst[1] |= (6 << 29); /* cvt */ - inst[1] |= 0x04000000; /* 32 bit */ - inst[1] |= (1 << 14); /* src .f32 */ - inst[1] |= ((1 << 6) << 14); /* .abs */ - set_dst(pc, dst, inst); - set_src_0(pc, src, inst); - - emit(pc, inst); + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0xa0000000; /* cvt */ + set_long(pc, e); + e->inst[1] |= (6 << 29); /* cvt */ + e->inst[1] |= 0x04000000; /* 32 bit */ + e->inst[1] |= (1 << 14); /* src .f32 */ + e->inst[1] |= ((1 << 6) << 14); /* .abs */ + set_dst(pc, dst, e); + set_src_0(pc, src, e); + + emit(pc, e); } static void @@ -731,7 +739,7 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, } if (mask & (1 << 2)) { - set_pred_wr(pc, 1, 0, &pc->p->insns[pc->p->insns_nr - 2]); + set_pred_wr(pc, 1, 0, pc->p->exec_tail); tmp[1] = temp_temp(pc); emit_minmax(pc, 4, tmp[1], src[1], zero); @@ -742,24 +750,24 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, emit_pow(pc, dst[2], tmp[1], tmp[3]); emit_mov(pc, dst[2], zero); - set_pred(pc, 3, 0, &pc->p->insns[pc->p->insns_nr - 2]); + set_pred(pc, 3, 0, pc->p->exec_tail); } } static void emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - unsigned inst[2] = { 0, 0 }; + struct nv50_program_exec *e = exec(pc); - set_long(pc, inst); - inst[0] |= 0xa0000000; /* delta */ - inst[1] |= (7 << 29); /* delta */ - inst[1] |= 0x04000000; /* negate arg0? probably not */ - inst[1] |= (1 << 14); /* src .f32 */ - set_dst(pc, dst, inst); - set_src_0(pc, src, inst); + set_long(pc, e); + e->inst[0] |= 0xa0000000; /* delta */ + e->inst[1] |= (7 << 29); /* delta */ + e->inst[1] |= 0x04000000; /* negate arg0? probably not */ + e->inst[1] |= (1 << 14); /* src .f32 */ + set_dst(pc, dst, e); + set_src_0(pc, src, e); - emit(pc, inst); + emit(pc, e); } static struct nv50_reg * @@ -1132,20 +1140,21 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) if (sat) { for (c = 0; c < 4; c++) { - unsigned inst[2] = { 0, 0 }; + struct nv50_program_exec *e; if (!(mask & (1 << c))) continue; - - inst[0] = 0xa0000000; /* cvt */ - set_long(pc, inst); - inst[1] |= (6 << 29); /* cvt */ - inst[1] |= 0x04000000; /* 32 bit */ - inst[1] |= (1 << 14); /* src .f32 */ - inst[1] |= ((1 << 5) << 14); /* .sat */ - set_dst(pc, rdst[c], inst); - set_src_0(pc, dst[c], inst); - emit(pc, inst); + e = exec(pc); + + e->inst[0] = 0xa0000000; /* cvt */ + set_long(pc, e); + e->inst[1] |= (6 << 29); /* cvt */ + e->inst[1] |= 0x04000000; /* 32 bit */ + e->inst[1] |= (1 << 14); /* src .f32 */ + e->inst[1] |= ((1 << 5) << 14); /* .sat */ + set_dst(pc, rdst[c], e); + set_src_0(pc, dst[c], e); + emit(pc, e); } } @@ -1384,6 +1393,9 @@ nv50_program_tx(struct nv50_program *p) emit_mov(pc, &out, &pc->result[out.hw]); } + assert(is_long(pc->p->exec_tail) && !is_immd(pc->p->exec_head)); + pc->p->exec_tail->inst[1] |= 0x00000001; + p->immd_nr = pc->immd_nr * 4; p->immd = pc->immd_buf; @@ -1397,20 +1409,17 @@ out_cleanup: static void nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) { - int i; + struct nv50_program_exec *e; if (nv50_program_tx(p) == FALSE) assert(0); - /* *not* sufficient, it's fine if last inst is long and - * NOT immd - otherwise it's fucked fucked fucked */ - p->insns[p->insns_nr - 1] |= 0x00000001; - if (p->type == PIPE_SHADER_VERTEX) { - for (i = 0; i < p->insns_nr; i++) - NOUVEAU_ERR("VP0x%08x\n", p->insns[i]); - } else { - for (i = 0; i < p->insns_nr; i++) - NOUVEAU_ERR("FP0x%08x\n", p->insns[i]); + e = p->exec_head; + while (e) { + NOUVEAU_ERR("0x%08x\n", e->inst[0]); + if (is_long(e)) + NOUVEAU_ERR("0x%08x\n", e->inst[1]); + e = e->next; } p->translated = TRUE; @@ -1432,12 +1441,18 @@ static void nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) { struct pipe_winsys *ws = nv50->pipe.winsys; - void *map; + struct nv50_program_exec *e; + unsigned *map; if (!p->buffer) - p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4); + p->buffer = ws->buffer_create(ws, 0x100, 0, p->exec_size * 4); + map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); - memcpy(map, p->insns, p->insns_nr * 4); + for (e = p->exec_head; e; e = e->next) { + *(map++) = e->inst[0]; + if (is_long(e)) + *(map++) = e->inst[1]; + } ws->buffer_unmap(ws, p->buffer); } @@ -1519,11 +1534,14 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) { struct pipe_winsys *ws = nv50->pipe.winsys; - if (p->insns_nr) { - if (p->insns) - FREE(p->insns); - p->insns_nr = 0; + while (p->exec_head) { + struct nv50_program_exec *e = p->exec_head; + + p->exec_head = e->next; + FREE(e); } + p->exec_tail = NULL; + p->exec_size = 0; if (p->buffer) pipe_buffer_reference(ws, &p->buffer, NULL); diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index dd5aed799a..6dafe56fbb 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -10,22 +10,36 @@ struct nv50_program_data { float value; }; +struct nv50_program_exec { + struct nv50_program_exec *next; + + unsigned inst[2]; + struct { + int index; + unsigned mask; + unsigned shift; + } param; +}; + struct nv50_program { struct pipe_shader_state pipe; struct tgsi_shader_info info; boolean translated; unsigned type; - unsigned *insns; - unsigned insns_nr; + struct nv50_program_exec *exec_head; + struct nv50_program_exec *exec_tail; + unsigned exec_size; + struct nv50_program_data *data; + struct nouveau_resource *data_res; + unsigned data_nr; + unsigned data_start; struct pipe_buffer *buffer; float *immd; unsigned immd_nr; - struct nouveau_resource *data; - struct { unsigned high_temp; struct { -- cgit v1.2.3 From 1c7489bd7e5391136d0f2e68b467de89eb2d2bfc Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 12 Jun 2008 12:26:43 +1000 Subject: nv50: allow relocating a shader's constants at upload time --- src/gallium/drivers/nv50/nv50_program.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 3df3848761..6c5aeb237d 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -201,6 +201,7 @@ exec(struct nv50_pc *pc) { struct nv50_program_exec *e = CALLOC_STRUCT(nv50_program_exec); + e->param.index = -1; return e; } @@ -311,7 +312,8 @@ emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, } static void -set_cseg(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) +set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, + struct nv50_program_exec *e) { set_long(pc, e); if (src->type == P_IMMD) { @@ -322,6 +324,10 @@ set_cseg(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) else e->inst[1] |= (NV50_CB_PFP << 22); } + + e->param.index = src->hw; + e->param.shift = s; + e->param.mask = m << (s % 32); } static void @@ -342,8 +348,7 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) } else if (src->type == P_IMMD || src->type == P_CONST) { set_long(pc, e); - set_cseg(pc, src, e); - e->inst[0] |= (src->hw << 9); + set_data(pc, src, 0x7f, 9, e); e->inst[1] |= 0x20000000; /* src0 const? */ } else { if (src->type == P_ATTR) { @@ -426,7 +431,7 @@ set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) emit_mov(pc, temp, src); src = temp; } else { - set_cseg(pc, src, e); + set_data(pc, src, 0x7f, 16, e); e->inst[0] |= 0x00800000; } } @@ -454,7 +459,7 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) emit_mov(pc, temp, src); src = temp; } else { - set_cseg(pc, src, e); + set_data(pc, src, 0x7f, 32+14, e); e->inst[0] |= 0x01000000; } } @@ -1449,6 +1454,12 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); for (e = p->exec_head; e; e = e->next) { + if (e->param.index >= 0) { + e->inst[e->param.shift / 32] &= ~e->param.mask; + e->inst[e->param.shift / 32] |= (e->param.index << + e->param.shift); + } + *(map++) = e->inst[0]; if (is_long(e)) *(map++) = e->inst[1]; -- cgit v1.2.3 From aea1669ff221f97682f0be6a60632e40c2739d09 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 12 Jun 2008 12:39:35 +1000 Subject: nv50: use constbuf segment 0 for everything - I can't make the others work.. --- src/gallium/drivers/nv50/nv50_program.c | 70 ++++++++++++++++++++++++++------- src/gallium/drivers/nv50/nv50_program.h | 11 +----- 2 files changed, 57 insertions(+), 24 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 6c5aeb237d..cbbecafb02 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -10,6 +10,7 @@ #include "nv50_context.h" #define NV50_SU_MAX_TEMP 64 +#define NV50_PROGRAM_DUMP /* ARL - gallium craps itself on progs/vp/arl.txt * @@ -316,6 +317,9 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, struct nv50_program_exec *e) { set_long(pc, e); +#if 1 + e->inst[1] |= (1 << 22); +#else if (src->type == P_IMMD) { e->inst[1] |= (NV50_CB_PMISC << 22); } else { @@ -324,6 +328,7 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, else e->inst[1] |= (NV50_CB_PFP << 22); } +#endif e->param.index = src->hw; e->param.shift = s; @@ -1335,7 +1340,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) } if (pc->immd_nr) { - int rid = 0; + int rid = pc->param_nr * 4; pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg)); if (!pc->immd) @@ -1401,6 +1406,7 @@ nv50_program_tx(struct nv50_program *p) assert(is_long(pc->p->exec_tail) && !is_immd(pc->p->exec_head)); pc->p->exec_tail->inst[1] |= 0x00000001; + p->param_nr = pc->param_nr * 4; p->immd_nr = pc->immd_nr * 4; p->immd = pc->immd_buf; @@ -1418,26 +1424,47 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) if (nv50_program_tx(p) == FALSE) assert(0); - - e = p->exec_head; - while (e) { - NOUVEAU_ERR("0x%08x\n", e->inst[0]); - if (is_long(e)) - NOUVEAU_ERR("0x%08x\n", e->inst[1]); - e = e->next; - } - p->translated = TRUE; } static void nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) { + struct nouveau_winsys *nvws = nv50->screen->nvws; + struct pipe_winsys *ws = nv50->pipe.winsys; + unsigned nr = p->param_nr + p->immd_nr; int i; + if (!p->data && nr) { + struct nouveau_resource *heap = nv50->screen->vp_data_heap; + + if (nvws->res_alloc(heap, nr, p, &p->data)) { + while (heap->next && heap->size < nr) { + struct nv50_program *evict = heap->next->priv; + nvws->res_free(&evict->data); + } + + if (nvws->res_alloc(heap, nr, p, &p->data)) + assert(0); + } + } + + if (p->param_nr) { + float *map = ws->buffer_map(ws, nv50->constbuf[p->type], + PIPE_BUFFER_USAGE_CPU_READ); + for (i = 0; i < p->param_nr; i++) { + BEGIN_RING(tesla, 0x0f00, 2); + OUT_RING ((NV50_CB_PMISC << 0) | + ((p->data->start + i) << 8)); + OUT_RING (fui(map[i])); + } + ws->buffer_unmap(ws, nv50->constbuf[p->type]); + } + for (i = 0; i < p->immd_nr; i++) { BEGIN_RING(tesla, 0x0f00, 2); - OUT_RING ((NV50_CB_PMISC << 0) | (i << 8)); + OUT_RING ((NV50_CB_PMISC << 0) | + ((p->data_start + p->param_nr + i) << 8)); OUT_RING (fui(p->immd[i])); } } @@ -1452,17 +1479,30 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) if (!p->buffer) p->buffer = ws->buffer_create(ws, 0x100, 0, p->exec_size * 4); - map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); - for (e = p->exec_head; e; e = e->next) { - if (e->param.index >= 0) { + if (p->data && p->data->start != p->data_start) { + for (e = p->exec_head; e; e = e->next) { + if (e->param.index < 0) + continue; e->inst[e->param.shift / 32] &= ~e->param.mask; e->inst[e->param.shift / 32] |= (e->param.index << e->param.shift); } + p->data_start = p->data->start; + } + + map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + for (e = p->exec_head; e; e = e->next) { +#ifdef NV50_PROGRAM_DUMP + NOUVEAU_ERR("0x%08x\n", e->inst[0]); +#endif *(map++) = e->inst[0]; - if (is_long(e)) + if (is_long(e)) { +#ifdef NV50_PROGRAM_DUMP + NOUVEAU_ERR("0x%08x\n", e->inst[1]); +#endif *(map++) = e->inst[1]; + } } ws->buffer_unmap(ws, p->buffer); } diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index 6dafe56fbb..d143ae9797 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -4,12 +4,6 @@ #include "pipe/p_state.h" #include "tgsi/util/tgsi_scan.h" -struct nv50_program_data { - int index; - int component; - float value; -}; - struct nv50_program_exec { struct nv50_program_exec *next; @@ -30,15 +24,14 @@ struct nv50_program { struct nv50_program_exec *exec_head; struct nv50_program_exec *exec_tail; unsigned exec_size; - struct nv50_program_data *data; - struct nouveau_resource *data_res; - unsigned data_nr; + struct nouveau_resource *data; unsigned data_start; struct pipe_buffer *buffer; float *immd; unsigned immd_nr; + unsigned param_nr; struct { unsigned high_temp; -- cgit v1.2.3 From ab3d55e2e3578db8deba84dcf47a024071486bd8 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 12 Jun 2008 13:11:41 +1000 Subject: nv50: more efficient const upload + fixes (fp/* works now!) --- src/gallium/drivers/nv50/nv50_program.c | 48 +++++++++++++++++++++------------ src/gallium/drivers/nv50/nv50_screen.c | 4 +-- 2 files changed, 33 insertions(+), 19 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index cbbecafb02..4c41e5a7c2 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1420,20 +1420,35 @@ out_cleanup: static void nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) { - struct nv50_program_exec *e; - if (nv50_program_tx(p) == FALSE) assert(0); p->translated = TRUE; } +static void +nv50_program_upload_data(struct nv50_context *nv50, float *map, + unsigned start, unsigned count) +{ + while (count) { + unsigned nr = count > 2047 ? 2047 : count; + + BEGIN_RING(tesla, 0x00000f00, 1); + OUT_RING ((NV50_CB_PMISC << 0) | (start << 8)); + BEGIN_RING(tesla, 0x40000f04, nr); + OUT_RINGp (map, nr); + + map += nr; + start += nr; + count -= nr; + } +} + static void nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) { struct nouveau_winsys *nvws = nv50->screen->nvws; struct pipe_winsys *ws = nv50->pipe.winsys; unsigned nr = p->param_nr + p->immd_nr; - int i; if (!p->data && nr) { struct nouveau_resource *heap = nv50->screen->vp_data_heap; @@ -1452,20 +1467,15 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) if (p->param_nr) { float *map = ws->buffer_map(ws, nv50->constbuf[p->type], PIPE_BUFFER_USAGE_CPU_READ); - for (i = 0; i < p->param_nr; i++) { - BEGIN_RING(tesla, 0x0f00, 2); - OUT_RING ((NV50_CB_PMISC << 0) | - ((p->data->start + i) << 8)); - OUT_RING (fui(map[i])); - } + nv50_program_upload_data(nv50, map, p->data->start, + p->param_nr); ws->buffer_unmap(ws, nv50->constbuf[p->type]); } - for (i = 0; i < p->immd_nr; i++) { - BEGIN_RING(tesla, 0x0f00, 2); - OUT_RING ((NV50_CB_PMISC << 0) | - ((p->data_start + p->param_nr + i) << 8)); - OUT_RING (fui(p->immd[i])); + if (p->immd_nr) { + nv50_program_upload_data(nv50, p->immd, + p->data->start + p->param_nr, + p->immd_nr); } } @@ -1481,11 +1491,15 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) if (p->data && p->data->start != p->data_start) { for (e = p->exec_head; e; e = e->next) { + unsigned ei, ci; + if (e->param.index < 0) continue; - e->inst[e->param.shift / 32] &= ~e->param.mask; - e->inst[e->param.shift / 32] |= (e->param.index << - e->param.shift); + ei = e->param.shift >> 5; + ci = e->param.index + p->data->start; + + e->inst[ei] &= ~e->param.mask; + e->inst[ei] |= (ci << e->param.shift); } p->data_start = p->data->start; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 2417bf8001..5f10fe2e44 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -229,8 +229,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) so_data (so, 8); /* Shared constant buffer */ - screen->constbuf = ws->buffer_create(ws, 0, 0, 256 * 4 * 4); - if (nvws->res_init(&screen->vp_data_heap, 0, 256)) { + screen->constbuf = ws->buffer_create(ws, 0, 0, 128 * 4 * 4); + if (nvws->res_init(&screen->vp_data_heap, 0, 128)) { NOUVEAU_ERR("Error initialising constant buffer\n"); nv50_screen_destroy(&screen->pipe); return NULL; -- cgit v1.2.3 From f700d6be6335a4d4394296891f783687b6f2d4f2 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 12 Jun 2008 13:50:56 +1000 Subject: nv50: remove some cruft, don't upload program unless really needed --- src/gallium/drivers/nv50/nv50_program.c | 9 ++++++- src/gallium/drivers/nv50/nv50_state_validate.c | 33 ++------------------------ 2 files changed, 10 insertions(+), 32 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 4c41e5a7c2..f71aa19312 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1484,10 +1484,13 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) { struct pipe_winsys *ws = nv50->pipe.winsys; struct nv50_program_exec *e; + boolean upload = FALSE; unsigned *map; - if (!p->buffer) + if (!p->buffer) { p->buffer = ws->buffer_create(ws, 0x100, 0, p->exec_size * 4); + upload = TRUE; + } if (p->data && p->data->start != p->data_start) { for (e = p->exec_head; e; e = e->next) { @@ -1503,8 +1506,12 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) } p->data_start = p->data->start; + upload = TRUE; } + if (!upload) + return FALSE; + map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); for (e = p->exec_head; e; e = e->next) { #ifdef NV50_PROGRAM_DUMP diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 05395c6df7..5f2244e3e8 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -114,10 +114,10 @@ nv50_state_validate(struct nv50_context *nv50) if (nv50->dirty & NV50_NEW_ZSA) so_emit(nvws, nv50->zsa->so); - if (nv50->dirty & NV50_NEW_VERTPROG) + if (nv50->dirty & (NV50_NEW_VERTPROG | NV50_NEW_VERTPROG_CB)) nv50_vertprog_validate(nv50); - if (nv50->dirty & NV50_NEW_FRAGPROG) + if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB)) nv50_fragprog_validate(nv50); if (nv50->dirty & NV50_NEW_RASTERIZER) @@ -168,35 +168,6 @@ nv50_state_validate(struct nv50_context *nv50) so_ref(NULL, &so); } - if (nv50->dirty & NV50_NEW_VERTPROG_CB) { - so = so_new(4, 2); - so_method(so, tesla, 0x1280, 3); - so_reloc (so, nv50->constbuf[PIPE_SHADER_VERTEX], 0, - NOUVEAU_BO_HIGH | NOUVEAU_BO_RD | NOUVEAU_BO_VRAM, - 0, 0); - so_reloc (so, nv50->constbuf[PIPE_SHADER_VERTEX], 0, - NOUVEAU_BO_LOW | NOUVEAU_BO_RD | NOUVEAU_BO_VRAM, - 0, 0); - so_data (so, (NV50_CB_PVP << 16) | 0x1000); - so_emit(nvws, so); - so_ref(NULL, &so); - } - - if (nv50->dirty & NV50_NEW_FRAGPROG_CB) { - so = so_new(4, 2); - so_method(so, tesla, 0x1280, 3); - so_reloc (so, nv50->constbuf[PIPE_SHADER_FRAGMENT], 0, - NOUVEAU_BO_HIGH | NOUVEAU_BO_RD | NOUVEAU_BO_VRAM, - 0, 0); - so_reloc (so, nv50->constbuf[PIPE_SHADER_FRAGMENT], 0, - NOUVEAU_BO_LOW | NOUVEAU_BO_RD | NOUVEAU_BO_VRAM, - 0, 0); - so_data (so, (NV50_CB_PFP << 16) | 0x1000); - so_emit(nvws, so); - so_ref(NULL, &so); - } - - if (nv50->dirty & NV50_NEW_ARRAYS) nv50_vbo_validate(nv50); -- cgit v1.2.3 From 619549a6377a58d54c9cf55f8863beed56b09566 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 12 Jun 2008 14:21:28 +1000 Subject: nv50: valgrind complaint --- src/gallium/drivers/nv50/nv50_program.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index f71aa19312..af5ca84c8e 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -146,8 +146,10 @@ static void free_temp(struct nv50_pc *pc, struct nv50_reg *r) { if (r->index == -1) { - FREE(pc->r_temp[r->hw]); - pc->r_temp[r->hw] = NULL; + unsigned hw = r->hw; + + FREE(pc->r_temp[hw]); + pc->r_temp[hw] = NULL; } } -- cgit v1.2.3 From 2fdeb4d5a5cc8b93bf885ba646e3a29a68c755ed Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 12 Jun 2008 23:49:26 +1000 Subject: nv50: comment on a so-far unseen bug --- src/gallium/drivers/nv50/nv50_program.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index af5ca84c8e..39597c5481 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -22,6 +22,9 @@ * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD, * but can emit to P_TEMP first - then MOV later. NVIDIA does this * + * In ops such as ADD it's possible to construct a bad opcode in the !is_long() + * case, if the emit_src() causes the inst to suddenly become long. + * * Verify half-insns work where expected - and force disable them where they * don't work - MUL has it forcibly disabled atm as it fixes POW.. * -- cgit v1.2.3 From c0ed6a871cd3513e17a1fab960f5626485ffed13 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 13 Jun 2008 12:09:46 +1000 Subject: nv50: do tsc/tic upload + stub out shader TEX stuff --- src/gallium/drivers/nv50/nv50_context.h | 17 +++++++++++++ src/gallium/drivers/nv50/nv50_miptree.c | 11 --------- src/gallium/drivers/nv50/nv50_program.c | 6 +++++ src/gallium/drivers/nv50/nv50_screen.c | 1 + src/gallium/drivers/nv50/nv50_state.c | 27 ++++++++++++++++++++- src/gallium/drivers/nv50/nv50_state_validate.c | 33 ++++++++++++++++++++++++++ 6 files changed, 83 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 19ec492eb3..5214ce1a69 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -44,6 +44,8 @@ #define NV50_NEW_FRAGPROG (1 << 10) #define NV50_NEW_FRAGPROG_CB (1 << 11) #define NV50_NEW_ARRAYS (1 << 12) +#define NV50_NEW_SAMPLER (1 << 13) +#define NV50_NEW_TEXTURE (1 << 14) struct nv50_blend_stateobj { struct pipe_blend_state pipe; @@ -60,6 +62,17 @@ struct nv50_rasterizer_stateobj { struct nouveau_stateobj *so; }; +struct nv50_miptree { + struct pipe_texture base; + struct pipe_buffer *buffer; +}; + +static INLINE struct nv50_miptree * +nv50_miptree(struct pipe_texture *pt) +{ + return (struct nv50_miptree *)pt; +} + struct nv50_context { struct pipe_context pipe; @@ -84,6 +97,10 @@ struct nv50_context { unsigned vtxbuf_nr; struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; unsigned vtxelt_nr; + unsigned *sampler[PIPE_MAX_SAMPLERS]; + unsigned sampler_nr; + struct nv50_miptree *miptree[PIPE_MAX_SAMPLERS]; + unsigned miptree_nr; }; static INLINE struct nv50_context * diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 459e0ff9dd..cdd98844f9 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -5,17 +5,6 @@ #include "nv50_context.h" -struct nv50_miptree { - struct pipe_texture base; - struct pipe_buffer *buffer; -}; - -static INLINE struct nv50_miptree * -nv50_miptree(struct pipe_texture *pt) -{ - return (struct nv50_miptree *)pt; -} - static struct pipe_texture * nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) { diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 39597c5481..5eec68e5e1 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -828,6 +828,8 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src) case TGSI_FILE_IMMEDIATE: r = &pc->immd[src->SrcRegister.Index * 4 + c]; break; + case TGSI_FILE_SAMPLER: + break; default: assert(0); break; @@ -1130,6 +1132,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_sub(pc, dst[c], src[0][c], src[1][c]); } break; + case TGSI_OPCODE_TEX: + break; case TGSI_OPCODE_XPD: temp = alloc_temp(pc, NULL); if (mask & (1 << 0)) { @@ -1226,6 +1230,8 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (pc->param_nr < (last + 1)) pc->param_nr = last + 1; break; + case TGSI_FILE_SAMPLER: + break; default: NOUVEAU_ERR("bad decl file %d\n", d->Declaration.File); diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index d76cce9f79..b9cecb77f1 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -28,6 +28,7 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, break; case PIPE_TEXTURE: switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_I8_UNORM: return TRUE; default: diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 0129b0fb14..f36299db4d 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -1,6 +1,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "pipe/p_util.h" +#include "pipe/p_inlines.h" #include "nv50_context.h" @@ -85,23 +86,47 @@ static void * nv50_sampler_state_create(struct pipe_context *pipe, const struct pipe_sampler_state *cso) { - return NULL; + unsigned *tsc = CALLOC(8, sizeof(unsigned)); + + tsc[0] = 0x00024080; + tsc[1] = 0x00000062; + + return (void *)tsc; } static void nv50_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) { + struct nv50_context *nv50 = nv50_context(pipe); + int i; + + nv50->sampler_nr = nr; + for (i = 0; i < nv50->sampler_nr; i++) + nv50->sampler[i] = sampler[i]; + + nv50->dirty |= NV50_NEW_SAMPLER; } static void nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso) { + FREE(hwcso); } static void nv50_set_sampler_texture(struct pipe_context *pipe, unsigned nr, struct pipe_texture **pt) { + struct nv50_context *nv50 = nv50_context(pipe); + int i; + + for (i = 0; i < nr; i++) + pipe_texture_reference(&nv50->miptree[i], pt[i]); + for (i = nr; i < nv50->miptree_nr; i++) + pipe_texture_reference(&nv50->miptree[i], NULL); + + nv50->miptree_nr = nr; + nv50->dirty |= NV50_NEW_TEXTURE; } static void * diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index a3f399a139..63c1756bcb 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -178,6 +178,39 @@ nv50_state_validate(struct nv50_context *nv50) so_ref(NULL, &so); } + if (nv50->dirty & NV50_NEW_SAMPLER) { + int i; + + BEGIN_RING(tesla, 0x0f00, 1); + OUT_RING ((NV50_CB_TSC << 0) | (0 << 8)); + BEGIN_RING(tesla, 0x40000f04, nv50->sampler_nr * 8); + for (i = 0; i < nv50->sampler_nr; i++) + OUT_RINGp(nv50->sampler[i], 8); + } + + if (nv50->dirty & NV50_NEW_TEXTURE) { + int i; + + BEGIN_RING(tesla, 0x0f00, 1); + OUT_RING ((NV50_CB_TIC << 0) | (0 << 8)); + BEGIN_RING(tesla, 0x40000f04, nv50->miptree_nr * 8); + for (i = 0; i < nv50->sampler_nr; i++) { + struct nv50_miptree *mt = nv50->miptree[i]; + + OUT_RING (0x2a712488); + OUT_RELOCl(mt->buffer, 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW); + OUT_RING (0xd0c05000); + OUT_RING (0x00300000); + OUT_RING (mt->base.width[0]); + OUT_RING ((mt->base.depth[0] << 16) | + mt->base.height[0]); + OUT_RING (0x03000000); + OUT_RELOCh(mt->buffer, 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH); + } + } + if (nv50->dirty & NV50_NEW_ARRAYS) nv50_vbo_validate(nv50); -- cgit v1.2.3 From fa5cd63f96d2b69ded48d40b9cb7e57c147f7332 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sun, 15 Jun 2008 15:49:25 +1000 Subject: nv50: simplify interp crap a bit... hopefully there wasn't a good reason I went the route I did.. can't recall.. --- src/gallium/drivers/nv50/nv50_program.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 5eec68e5e1..5800a347bf 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1263,7 +1263,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr); if (pc->attr_nr) { - struct nv50_reg *iv = NULL, *tmp = NULL; + struct nv50_reg *iv = NULL; int aid = 0; pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg)); @@ -1272,6 +1272,8 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (pc->p->type == PIPE_SHADER_FRAGMENT) { iv = alloc_temp(pc, NULL); + emit_interp(pc, iv, iv, iv, FALSE); + emit_flop(pc, 0, iv, iv); aid++; } @@ -1297,14 +1299,10 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (pc->p->type != PIPE_SHADER_FRAGMENT) continue; - emit_interp(pc, iv, iv, iv, FALSE); - tmp = alloc_temp(pc, NULL); - emit_flop(pc, 0, tmp, iv); - emit_interp(pc, &a[0], &a[0], tmp, TRUE); - emit_interp(pc, &a[1], &a[1], tmp, TRUE); - emit_interp(pc, &a[2], &a[2], tmp, TRUE); - emit_interp(pc, &a[3], &a[3], tmp, TRUE); - free_temp(pc, tmp); + emit_interp(pc, &a[0], &a[0], iv, TRUE); + emit_interp(pc, &a[1], &a[1], iv, TRUE); + emit_interp(pc, &a[2], &a[2], iv, TRUE); + emit_interp(pc, &a[3], &a[3], iv, TRUE); } if (iv) -- cgit v1.2.3 From da66b8a2f4c3c052ad71b2b6d5a845c2fd267c6e Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sun, 15 Jun 2008 15:53:22 +1000 Subject: nv50: disable inline IMMD for now, IMMD+pred == BANG! fixes progs/fp/lit.txt --- src/gallium/drivers/nv50/nv50_program.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 5800a347bf..845fc61190 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -31,10 +31,6 @@ * FUCK! watch dst==src vectors, can overwrite components that are needed. * ie. SUB R0, R0.yzxw, R0 * - * MOV dst, -src - * "delta" tmp, -src (0xa0000204,0xe4004780 - delta r0, -r0) - * mov dst, tmp - * * Things to check with renouveau: * FP attr/result assignment - how? * attrib @@ -349,7 +345,7 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) set_dst(pc, dst, e); - if (dst->type != P_RESULT && src->type == P_IMMD) { + if (0 && dst->type != P_RESULT && src->type == P_IMMD) { set_immd(pc, src, e); /*XXX: 32-bit, but steals part of "half" reg space - need to * catch and handle this case if/when we do half-regs -- cgit v1.2.3 From fea0b1651677444fc6c135e1a4b8ab6463a9fdf9 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 16 Jun 2008 12:55:53 +1000 Subject: nv50: a couple more bits'n'pieces --- src/gallium/drivers/nv50/nv50_program.c | 10 +++++++++- src/gallium/drivers/nv50/nv50_program.h | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 845fc61190..2fe60ad51f 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -93,6 +93,11 @@ alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) { int i; + if (reg->type == P_RESULT) { + if (pc->p->cfg.high_result < (reg->hw + 1)) + pc->p->cfg.high_result = reg->hw + 1; + } + if (reg->type != P_TEMP) return; @@ -1558,6 +1563,8 @@ nv50_vertprog_validate(struct nv50_context *nv50) so_method(so, tesla, 0x1650, 2); so_data (so, p->cfg.vp.attr[0]); so_data (so, p->cfg.vp.attr[1]); + so_method(so, tesla, 0x16b8, 1); + so_data (so, p->cfg.high_result); so_method(so, tesla, 0x16ac, 2); so_data (so, 8); so_data (so, p->cfg.high_temp); @@ -1594,9 +1601,10 @@ nv50_fragprog_validate(struct nv50_context *nv50) so_data (so, 0x00000004); so_data (so, 0x00000000); so_data (so, 0x00000000); - so_method(so, tesla, 0x16bc, 2); /*XXX: fixme */ + so_method(so, tesla, 0x16bc, 3); /*XXX: fixme */ so_data (so, 0x03020100); so_data (so, 0x07060504); + so_data (so, 0x0b0a0908); so_method(so, tesla, 0x1988, 2); so_data (so, 0x08040404); /* p: 0x0f000401 */ so_data (so, p->cfg.high_temp); diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index d143ae9797..d643e8db21 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -35,6 +35,7 @@ struct nv50_program { struct { unsigned high_temp; + unsigned high_result; struct { unsigned attr[2]; } vp; -- cgit v1.2.3 From cae38d0fcc6c936d3a4dc25ca2dbef3d106d05a5 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 16 Jun 2008 16:29:40 +1000 Subject: nv50: abuse constbuf upload for program upload --- src/gallium/drivers/nv50/nv50_context.h | 1 + src/gallium/drivers/nv50/nv50_program.c | 50 +++++++++++++++++++++++++-------- 2 files changed, 39 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 5214ce1a69..cbb473410a 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -30,6 +30,7 @@ #define NV50_CB_PGP 3 #define NV50_CB_TIC 4 #define NV50_CB_TSC 5 +#define NV50_CB_PUPLOAD 6 #define NV50_NEW_BLEND (1 << 0) #define NV50_NEW_ZSA (1 << 1) diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 2fe60ad51f..2cf6275730 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1494,8 +1494,10 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) { struct pipe_winsys *ws = nv50->pipe.winsys; struct nv50_program_exec *e; + struct nouveau_stateobj *so; + const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR; + unsigned start, count, *up, *ptr; boolean upload = FALSE; - unsigned *map; if (!p->buffer) { p->buffer = ws->buffer_create(ws, 0x100, 0, p->exec_size * 4); @@ -1522,20 +1524,44 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) if (!upload) return FALSE; - map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + up = ptr = MALLOC(p->exec_size * 4); for (e = p->exec_head; e; e = e->next) { -#ifdef NV50_PROGRAM_DUMP - NOUVEAU_ERR("0x%08x\n", e->inst[0]); -#endif - *(map++) = e->inst[0]; - if (is_long(e)) { -#ifdef NV50_PROGRAM_DUMP - NOUVEAU_ERR("0x%08x\n", e->inst[1]); -#endif - *(map++) = e->inst[1]; + *(ptr++) = e->inst[0]; + if (is_long(e)) + *(ptr++) = e->inst[1]; + } + + so = so_new(3,2); + so_method(so, nv50->screen->tesla, 0x1280, 3); + so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_LOW, 0, 0); + so_data (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4)); + + start = 0; count = p->exec_size; + while (count) { + struct nouveau_winsys *nvws = nv50->screen->nvws; + unsigned nr; + + so_emit(nvws, so); + + nr = MIN2(count, 2047); + nr = MIN2(nvws->channel->pushbuf->remaining, nr); + if (nvws->channel->pushbuf->remaining < (nr + 3)) { + FIRE_RING(NULL); + continue; } + + BEGIN_RING(tesla, 0x0f00, 1); + OUT_RING ((start << 8) | NV50_CB_PUPLOAD); + BEGIN_RING(tesla, 0x40000f04, nr); + OUT_RINGp (up + start, nr); + + start += nr; + count -= nr; } - ws->buffer_unmap(ws, p->buffer); + + FREE(up); + so_ref(NULL, &so); } void -- cgit v1.2.3 From bcbe6baac37915563bc120ad558cd930bc1ddec1 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 16 Jun 2008 18:03:29 +1000 Subject: nv50: hacks for stuff I don't really get yet --- src/gallium/drivers/nv50/nv50_program.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 2cf6275730..fa5e24d3e9 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1592,7 +1592,7 @@ nv50_vertprog_validate(struct nv50_context *nv50) so_method(so, tesla, 0x16b8, 1); so_data (so, p->cfg.high_result); so_method(so, tesla, 0x16ac, 2); - so_data (so, 8); + so_data (so, p->cfg.high_result); //8); so_data (so, p->cfg.high_temp); so_method(so, tesla, 0x140c, 1); so_data (so, 0); /* program start offset */ @@ -1632,7 +1632,7 @@ nv50_fragprog_validate(struct nv50_context *nv50) so_data (so, 0x07060504); so_data (so, 0x0b0a0908); so_method(so, tesla, 0x1988, 2); - so_data (so, 0x08040404); /* p: 0x0f000401 */ + so_data (so, 0x08080408); //0x08040404); /* p: 0x0f000401 */ so_data (so, p->cfg.high_temp); so_method(so, tesla, 0x1414, 1); so_data (so, 0); /* program start offset */ -- cgit v1.2.3 From 431504b99cd55948522e86a249e656e78598ddbd Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 16 Jun 2008 18:56:39 +1000 Subject: nv50: hack of a TEX opcode --- src/gallium/drivers/nv50/nv50_program.c | 19 +++++++++++++++++++ src/gallium/drivers/nv50/nv50_screen.c | 5 +++++ 2 files changed, 24 insertions(+) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index fa5e24d3e9..21945410b1 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1134,6 +1134,25 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } break; case TGSI_OPCODE_TEX: + { + struct nv50_reg *t0, *t1; + struct nv50_program_exec *e; + + t0 = alloc_temp(pc, NULL); + t0 = alloc_temp(pc, NULL); + t1 = alloc_temp(pc, NULL); + emit_mov(pc, t0, src[0][0]); + emit_mov(pc, t1, src[0][1]); + + e = exec(pc); + e->inst[0] = 0xf0400000; + set_long(pc, e); + e->inst[1] |= 0x0000c004; + set_dst(pc, t0, e); + emit(pc, e); + free_temp(pc, t0); + free_temp(pc, t1); + } break; case TGSI_OPCODE_XPD: temp = alloc_temp(pc, NULL); diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index b9cecb77f1..8affb0f073 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -289,6 +289,11 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) so_data (so, fui(0.0)); so_data (so, fui(1.0)); + so_method(so, screen->tesla, 0x1234, 1); + so_data (so, 1); + so_method(so, screen->tesla, 0x1458, 1); + so_data (so, 1); + so_emit(nvws, so); so_ref(NULL, &so); nvws->push_flush(nvws, 0, NULL); -- cgit v1.2.3 From bb9efb5534a652878161e28bd73039eff5b11014 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 16 Jun 2008 22:24:16 +1000 Subject: nv50: separate state validation and upload, similar to nv40 --- src/gallium/drivers/nv50/nv50_context.h | 21 +++++++ src/gallium/drivers/nv50/nv50_program.c | 6 +- src/gallium/drivers/nv50/nv50_state_validate.c | 80 ++++++++++++++++++++------ src/gallium/drivers/nv50/nv50_vbo.c | 6 +- 4 files changed, 86 insertions(+), 27 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index cbb473410a..3e0e523ee3 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -74,6 +74,25 @@ nv50_miptree(struct pipe_texture *pt) return (struct nv50_miptree *)pt; } +struct nv50_state { + unsigned dirty; + + struct nouveau_stateobj *fb; + struct nouveau_stateobj *blend; + struct nouveau_stateobj *blend_colour; + struct nouveau_stateobj *zsa; + struct nouveau_stateobj *rast; + struct nouveau_stateobj *stipple; + struct nouveau_stateobj *scissor; + struct nouveau_stateobj *viewport; + struct nouveau_stateobj *tsc_upload; + struct nouveau_stateobj *tic_upload; + struct nouveau_stateobj *vertprog; + struct nouveau_stateobj *fragprog; + struct nouveau_stateobj *vtxfmt; + struct nouveau_stateobj *vtxbuf; +}; + struct nv50_context { struct pipe_context pipe; @@ -82,6 +101,8 @@ struct nv50_context { struct draw_context *draw; + struct nv50_state state; + unsigned dirty; struct nv50_blend_stateobj *blend; struct nv50_zsa_stateobj *zsa; diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 21945410b1..7bb2f454bb 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1615,8 +1615,7 @@ nv50_vertprog_validate(struct nv50_context *nv50) so_data (so, p->cfg.high_temp); so_method(so, tesla, 0x140c, 1); so_data (so, 0); /* program start offset */ - so_emit(nv50->screen->nvws, so); - so_ref(NULL, &so); + so_ref(so, &nv50->state.vertprog); } void @@ -1655,8 +1654,7 @@ nv50_fragprog_validate(struct nv50_context *nv50) so_data (so, p->cfg.high_temp); so_method(so, tesla, 0x1414, 1); so_data (so, 0); /* program start offset */ - so_emit(nv50->screen->nvws, so); - so_ref(NULL, &so); + so_ref(so, &nv50->state.fragprog); } void diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 9d93c04da7..566f95f682 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -103,14 +103,59 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_data (so, 0); so_data (so, h); - so_emit(nv50->screen->nvws, so); - so_ref(NULL, &so); + so_ref(so, &nv50->state.fb); +} + +static void +nv50_state_emit(struct nv50_context *nv50) +{ + struct nv50_screen *screen = nv50->screen; + struct nouveau_winsys *nvws = screen->nvws; + + if (nv50->pctx_id != screen->cur_pctx) { + nv50->state.dirty |= 0xffffffff; + screen->cur_pctx = nv50->pctx_id; + } + + if (nv50->state.dirty & NV50_NEW_FRAMEBUFFER) + so_emit(nvws, nv50->state.fb); + if (nv50->state.dirty & NV50_NEW_BLEND) + so_emit(nvws, nv50->state.blend); + if (nv50->state.dirty & NV50_NEW_ZSA) + so_emit(nvws, nv50->state.zsa); + if (nv50->state.dirty & NV50_NEW_VERTPROG) + so_emit(nvws, nv50->state.vertprog); + if (nv50->state.dirty & NV50_NEW_FRAGPROG) + so_emit(nvws, nv50->state.fragprog); + if (nv50->state.dirty & NV50_NEW_RASTERIZER) + so_emit(nvws, nv50->state.rast); + if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR) + so_emit(nvws, nv50->state.blend_colour); + if (nv50->state.dirty & NV50_NEW_STIPPLE) + so_emit(nvws, nv50->state.stipple); + if (nv50->state.dirty & NV50_NEW_SCISSOR) + so_emit(nvws, nv50->state.scissor); + if (nv50->state.dirty & NV50_NEW_VIEWPORT) + so_emit(nvws, nv50->state.viewport); + if (nv50->state.dirty & NV50_NEW_SAMPLER) + so_emit(nvws, nv50->state.tsc_upload); + if (nv50->state.dirty & NV50_NEW_TEXTURE) + so_emit(nvws, nv50->state.tic_upload); + if (nv50->state.dirty & NV50_NEW_ARRAYS) { + so_emit(nvws, nv50->state.vtxfmt); + so_emit(nvws, nv50->state.vtxbuf); + } + nv50->state.dirty = 0; + + so_emit_reloc_markers(nvws, nv50->state.fb); + so_emit_reloc_markers(nvws, nv50->state.vertprog); + so_emit_reloc_markers(nvws, nv50->state.fragprog); + so_emit_reloc_markers(nvws, nv50->state.vtxbuf); } boolean nv50_state_validate(struct nv50_context *nv50) { - struct nouveau_winsys *nvws = nv50->screen->nvws; struct nouveau_grobj *tesla = nv50->screen->tesla; struct nouveau_stateobj *so; unsigned i; @@ -119,10 +164,10 @@ nv50_state_validate(struct nv50_context *nv50) nv50_state_validate_fb(nv50); if (nv50->dirty & NV50_NEW_BLEND) - so_emit(nvws, nv50->blend->so); + so_ref(nv50->blend->so, &nv50->state.blend); if (nv50->dirty & NV50_NEW_ZSA) - so_emit(nvws, nv50->zsa->so); + so_ref(nv50->zsa->so, &nv50->state.zsa); if (nv50->dirty & (NV50_NEW_VERTPROG | NV50_NEW_VERTPROG_CB)) nv50_vertprog_validate(nv50); @@ -131,7 +176,7 @@ nv50_state_validate(struct nv50_context *nv50) nv50_fragprog_validate(nv50); if (nv50->dirty & NV50_NEW_RASTERIZER) - so_emit(nvws, nv50->rasterizer->so); + so_ref(nv50->rasterizer->so, &nv50->state.rast); if (nv50->dirty & NV50_NEW_BLEND_COLOUR) { so = so_new(5, 0); @@ -140,8 +185,7 @@ nv50_state_validate(struct nv50_context *nv50) so_data (so, fui(nv50->blend_colour.color[1])); so_data (so, fui(nv50->blend_colour.color[2])); so_data (so, fui(nv50->blend_colour.color[3])); - so_emit(nvws, so); - so_ref(NULL, &so); + so_ref(so, &nv50->state.blend_colour); } if (nv50->dirty & NV50_NEW_STIPPLE) { @@ -149,8 +193,7 @@ nv50_state_validate(struct nv50_context *nv50) so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32); for (i = 0; i < 32; i++) so_data(so, nv50->stipple.stipple[i]); - so_emit(nvws, so); - so_ref(NULL, &so); + so_ref(so, &nv50->state.stipple); } if (nv50->dirty & NV50_NEW_SCISSOR) { @@ -160,8 +203,7 @@ nv50_state_validate(struct nv50_context *nv50) nv50->scissor.minx); so_data (so, (nv50->scissor.maxy << 16) | nv50->scissor.miny); - so_emit(nvws, so); - so_ref(NULL, &so); + so_ref(so, &nv50->state.scissor); } if (nv50->dirty & NV50_NEW_VIEWPORT) { @@ -174,8 +216,7 @@ nv50_state_validate(struct nv50_context *nv50) so_data (so, fui(nv50->viewport.scale[0])); so_data (so, fui(-nv50->viewport.scale[1])); so_data (so, fui(nv50->viewport.scale[2])); - so_emit(nvws, so); - so_ref(NULL, &so); + so_ref(so, &nv50->state.viewport); } if (nv50->dirty & NV50_NEW_SAMPLER) { @@ -187,8 +228,7 @@ nv50_state_validate(struct nv50_context *nv50) so_method(so, tesla, 0x40000f04, nv50->sampler_nr * 8); for (i = 0; i < nv50->sampler_nr; i++) so_datap (so, nv50->sampler[i], 8); - so_emit(nvws, so); - so_ref(NULL, &so); + so_ref(so, &nv50->state.tsc_upload); } if (nv50->dirty & NV50_NEW_TEXTURE) { @@ -213,15 +253,17 @@ nv50_state_validate(struct nv50_context *nv50) so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH, 0, 0); } - - so_emit(nvws, so); - so_ref(NULL, &so); + + so_ref(so, &nv50->state.tic_upload); } if (nv50->dirty & NV50_NEW_ARRAYS) nv50_vbo_validate(nv50); + nv50->state.dirty |= nv50->dirty; nv50->dirty = 0; + nv50_state_emit(nv50); + return TRUE; } diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 7a1c2f24ef..3f0b66ae36 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -208,9 +208,7 @@ nv50_vbo_validate(struct nv50_context *nv50) NOUVEAU_BO_LOW, 0, 0); } - so_emit(nv50->screen->nvws, vtxfmt); - so_ref (NULL, &vtxfmt); - so_emit(nv50->screen->nvws, vtxbuf); - so_ref (NULL, &vtxbuf); + so_ref (vtxfmt, &nv50->state.vtxfmt); + so_ref (vtxbuf, &nv50->state.vtxbuf); } -- cgit v1.2.3 From 598b2a51052913521e3059cdef7cf0c66a5adb90 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 17 Jun 2008 01:36:36 +1000 Subject: nv50: make TEX a halfie --- src/gallium/drivers/nv50/nv50_program.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 7bb2f454bb..5ef0954239 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1145,9 +1145,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_mov(pc, t1, src[0][1]); e = exec(pc); - e->inst[0] = 0xf0400000; - set_long(pc, e); - e->inst[1] |= 0x0000c004; + e->inst[0] = 0xf0400100; set_dst(pc, t0, e); emit(pc, e); free_temp(pc, t0); -- cgit v1.2.3 From fd7412a7f1beab8b81ce307b1054331eee102e8b Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 17 Jun 2008 01:51:13 +1000 Subject: nv50: some people are just born stupid.. really.. --- src/gallium/drivers/nv50/nv50_program.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 5ef0954239..ba60b8c533 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1135,21 +1135,33 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) break; case TGSI_OPCODE_TEX: { - struct nv50_reg *t0, *t1; + struct nv50_reg *t0, *t1, *t2, *t3; struct nv50_program_exec *e; t0 = alloc_temp(pc, NULL); t0 = alloc_temp(pc, NULL); t1 = alloc_temp(pc, NULL); + t2 = alloc_temp(pc, NULL); + t3 = alloc_temp(pc, NULL); emit_mov(pc, t0, src[0][0]); emit_mov(pc, t1, src[0][1]); e = exec(pc); - e->inst[0] = 0xf0400100; + e->inst[0] = 0xf0400000; + set_long(pc, e); + e->inst[1] |= 0x0000c004; set_dst(pc, t0, e); emit(pc, e); + + if (mask & (1 << 0)) emit_mov(pc, dst[0], t0); + if (mask & (1 << 1)) emit_mov(pc, dst[1], t1); + if (mask & (1 << 2)) emit_mov(pc, dst[2], t2); + if (mask & (1 << 3)) emit_mov(pc, dst[3], t3); + free_temp(pc, t0); free_temp(pc, t1); + free_temp(pc, t2); + free_temp(pc, t3); } break; case TGSI_OPCODE_XPD: -- cgit v1.2.3 From 5a3ea9ee59ac586955f7784eb25e7fd70d0c8882 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 18 Jun 2008 13:23:00 +1000 Subject: nv50: simplify emit_interp a bit --- src/gallium/drivers/nv50/nv50_program.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index ba60b8c533..3248f2aa3c 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -302,18 +302,19 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) static void emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, - struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective) + struct nv50_reg *src, struct nv50_reg *iv) { struct nv50_program_exec *e = exec(pc); e->inst[0] |= 0x80000000; set_dst(pc, dst, e); - alloc_reg(pc, iv); - e->inst[0] |= (iv->hw << 9); alloc_reg(pc, src); e->inst[0] |= (src->hw << 16); - if (noperspective) + if (iv) { e->inst[0] |= (1 << 25); + alloc_reg(pc, iv); + e->inst[0] |= (iv->hw << 9); + } emit(pc, e); } @@ -1147,7 +1148,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_mov(pc, t1, src[0][1]); e = exec(pc); - e->inst[0] = 0xf0400000; + e->inst[0] = 0xf6400000; set_long(pc, e); e->inst[1] |= 0x0000c004; set_dst(pc, t0, e); @@ -1302,7 +1303,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (pc->p->type == PIPE_SHADER_FRAGMENT) { iv = alloc_temp(pc, NULL); - emit_interp(pc, iv, iv, iv, FALSE); + emit_interp(pc, iv, iv, NULL); emit_flop(pc, 0, iv, iv); aid++; } @@ -1329,10 +1330,10 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (pc->p->type != PIPE_SHADER_FRAGMENT) continue; - emit_interp(pc, &a[0], &a[0], iv, TRUE); - emit_interp(pc, &a[1], &a[1], iv, TRUE); - emit_interp(pc, &a[2], &a[2], iv, TRUE); - emit_interp(pc, &a[3], &a[3], iv, TRUE); + emit_interp(pc, &a[0], &a[0], iv); + emit_interp(pc, &a[1], &a[1], iv); + emit_interp(pc, &a[2], &a[2], iv); + emit_interp(pc, &a[3], &a[3], iv); } if (iv) -- cgit v1.2.3 From e90130257527aff43f807ae16d802c5515d29e8e Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 23 Jun 2008 23:42:53 +1000 Subject: nv50: remove some debug --- src/gallium/drivers/nv50/nv50_program.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 3248f2aa3c..148de3b7ea 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -883,8 +883,6 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) unsigned mask, sat; int i, c; - NOUVEAU_ERR("insn %p\n", tok); - mask = inst->FullDstRegisters[0].DstRegister.WriteMask; sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE; @@ -1277,7 +1275,6 @@ nv50_program_tx_prep(struct nv50_pc *pc) } } - NOUVEAU_ERR("%d temps\n", pc->temp_nr); if (pc->temp_nr) { pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg)); if (!pc->temp) @@ -1292,7 +1289,6 @@ nv50_program_tx_prep(struct nv50_pc *pc) } } - NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr); if (pc->attr_nr) { struct nv50_reg *iv = NULL; int aid = 0; @@ -1340,7 +1336,6 @@ nv50_program_tx_prep(struct nv50_pc *pc) free_temp(pc, iv); } - NOUVEAU_ERR("%d result regs\n", pc->result_nr); if (pc->result_nr) { int rid = 0; @@ -1362,7 +1357,6 @@ nv50_program_tx_prep(struct nv50_pc *pc) } } - NOUVEAU_ERR("%d param regs\n", pc->param_nr); if (pc->param_nr) { int rid = 0; -- cgit v1.2.3 From e002ad77398fbe14a0efbd91824c3325ca09b4c1 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 25 Jun 2008 04:57:27 +1000 Subject: nv50: whoops --- src/gallium/drivers/nv50/nv50_program.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 148de3b7ea..bc45fe0bdf 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1555,7 +1555,7 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) *(ptr++) = e->inst[1]; } - so = so_new(3,2); + so = so_new(4,2); so_method(so, nv50->screen->tesla, 0x1280, 3); so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_LOW, 0, 0); @@ -1604,7 +1604,7 @@ nv50_vertprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(11, 2); + so = so_new(13, 2); so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); -- cgit v1.2.3 From bf94027fdde51aed476e9bfdd4326aa9040440b0 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sun, 29 Jun 2008 15:59:24 +1000 Subject: nv50: fixes after rebase + commits note on the code that was just pushed. OK, seems a lot of people have been getting the idea that nouveau is dying lately - I decided to commit some of the work I've been doing lately to prove them wrong :) Progress on my side is slow due to lack of time mainly, but I'm still around. Firstly, don't even bother trying to use gallium on G8x/G9x yet, it won't work. I've deliberately left all the necessary winsys changes out of the commits for a very good reason - I don't know what we're going to need from the DRM exactly yet and don't want to be continually breaking interfaces as I discover additional requirements. On my side, I think I've gone through about 3 different DRM interface changes, and have just discovered that I may need more yet. It'd be very annoying for everyone who uses nouveau to keep things in sync. Once I've got it sorted - I'll commit a lot of cool stuff. Stay tuned. Also, don't look at the shader code.. it's horribly nasty and full of hacks, I used it as an opportunity to learn G8x GPU programs at the same time. New semi-decent code is in works, and will follow at some point. :) --- src/gallium/drivers/nv50/nv50_miptree.c | 2 +- src/gallium/drivers/nv50/nv50_program.c | 4 ++-- src/gallium/drivers/nv50/nv50_state.c | 4 ++-- src/gallium/winsys/dri/nouveau/nouveau_screen.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index ec6e09aea0..f936a1f0e2 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -26,7 +26,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) break; } - pitch = ((pt->width[0] + 63) & ~63) * pt->cpp; + pitch = ((pt->width[0] + 63) & ~63) * pt->block.size; mt->buffer = ws->buffer_create(ws, 256, usage, pitch * pt->height[0]); if (!mt->buffer) { diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index bc45fe0bdf..cd6967b366 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1240,7 +1240,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) unsigned last; d = &p.FullToken.FullDeclaration; - last = d->u.DeclarationRange.Last; + last = d->DeclarationRange.Last; switch (d->Declaration.File) { case TGSI_FILE_TEMPORARY: @@ -1546,7 +1546,7 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) } if (!upload) - return FALSE; + return; up = ptr = MALLOC(p->exec_size * 4); for (e = p->exec_head; e; e = e->next) { diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 665e3cc80d..743de089d6 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -121,9 +121,9 @@ nv50_set_sampler_texture(struct pipe_context *pipe, unsigned nr, int i; for (i = 0; i < nr; i++) - pipe_texture_reference(&nv50->miptree[i], pt[i]); + pipe_texture_reference((void *)&nv50->miptree[i], pt[i]); for (i = nr; i < nv50->miptree_nr; i++) - pipe_texture_reference(&nv50->miptree[i], NULL); + pipe_texture_reference((void *)&nv50->miptree[i], NULL); nv50->miptree_nr = nr; nv50->dirty |= NV50_NEW_TEXTURE; diff --git a/src/gallium/winsys/dri/nouveau/nouveau_screen.c b/src/gallium/winsys/dri/nouveau/nouveau_screen.c index b15ee7509c..df1fe7e69b 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_screen.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_screen.c @@ -13,7 +13,7 @@ #include "nouveau_screen.h" #include "nouveau_swapbuffers.h" -#if NOUVEAU_DRM_HEADER_PATCHLEVEL != 10 +#if NOUVEAU_DRM_HEADER_PATCHLEVEL != 11 #error nouveau_drm.h version does not match expected version #endif -- cgit v1.2.3 From 861629d1fd4a1d256c913470c33d9522e83d615d Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 11 Jul 2008 00:15:01 +1000 Subject: nv50: more "abuse" by using libc malloc etc.. --- src/gallium/drivers/nv50/nv50_context.c | 2 +- src/gallium/drivers/nv50/nv50_draw.c | 2 +- src/gallium/drivers/nv50/nv50_program.c | 14 +++++++------- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index a225c4bf72..2494784a94 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -21,7 +21,7 @@ nv50_destroy(struct pipe_context *pipe) struct nv50_context *nv50 = (struct nv50_context *)pipe; draw_destroy(nv50->draw); - free(nv50); + FREE(nv50); } diff --git a/src/gallium/drivers/nv50/nv50_draw.c b/src/gallium/drivers/nv50/nv50_draw.c index d185d99950..2b0c0384a3 100644 --- a/src/gallium/drivers/nv50/nv50_draw.c +++ b/src/gallium/drivers/nv50/nv50_draw.c @@ -46,7 +46,7 @@ nv50_render_reset_stipple_counter(struct draw_stage *stage) static void nv50_render_destroy(struct draw_stage *stage) { - free(stage); + FREE(stage); } struct draw_stage * diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index cd6967b366..ff81719f03 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -180,8 +180,8 @@ kill_temp_temp(struct nv50_pc *pc) static int ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w) { - pc->immd_buf = realloc(pc->immd_buf, (pc->immd_nr + 1) * 4 * - sizeof(float)); + pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * r * sizeof(float)), + (pc->immd_nr + 1) * 4 * sizeof(float)); pc->immd_buf[(pc->immd_nr * 4) + 0] = x; pc->immd_buf[(pc->immd_nr * 4) + 1] = y; pc->immd_buf[(pc->immd_nr * 4) + 2] = z; @@ -1276,7 +1276,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) } if (pc->temp_nr) { - pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg)); + pc->temp = CALLOC(pc->temp_nr * 4, sizeof(struct nv50_reg)); if (!pc->temp) goto out_err; @@ -1293,7 +1293,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) struct nv50_reg *iv = NULL; int aid = 0; - pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg)); + pc->attr = CALLOC(pc->attr_nr * 4, sizeof(struct nv50_reg)); if (!pc->attr) goto out_err; @@ -1339,7 +1339,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (pc->result_nr) { int rid = 0; - pc->result = calloc(pc->result_nr * 4, sizeof(struct nv50_reg)); + pc->result = CALLOC(pc->result_nr * 4, sizeof(struct nv50_reg)); if (!pc->result) goto out_err; @@ -1360,7 +1360,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (pc->param_nr) { int rid = 0; - pc->param = calloc(pc->param_nr * 4, sizeof(struct nv50_reg)); + pc->param = CALLOC(pc->param_nr * 4, sizeof(struct nv50_reg)); if (!pc->param) goto out_err; @@ -1376,7 +1376,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (pc->immd_nr) { int rid = pc->param_nr * 4; - pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg)); + pc->immd = CALLOC(pc->immd_nr * 4, sizeof(struct nv50_reg)); if (!pc->immd) goto out_err; -- cgit v1.2.3 From 857a3294a959015bf893241199f7fd7f7882a6ab Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 11 Jul 2008 20:44:39 +1000 Subject: nv50: add license headers to .c files --- src/gallium/drivers/nv50/nv50_clear.c | 22 ++++++++++++ src/gallium/drivers/nv50/nv50_context.c | 22 ++++++++++++ src/gallium/drivers/nv50/nv50_draw.c | 22 ++++++++++++ src/gallium/drivers/nv50/nv50_miptree.c | 22 ++++++++++++ src/gallium/drivers/nv50/nv50_program.c | 22 ++++++++++++ src/gallium/drivers/nv50/nv50_query.c | 22 ++++++++++++ src/gallium/drivers/nv50/nv50_screen.c | 22 ++++++++++++ src/gallium/drivers/nv50/nv50_state.c | 22 ++++++++++++ src/gallium/drivers/nv50/nv50_state_validate.c | 22 ++++++++++++ src/gallium/drivers/nv50/nv50_surface.c | 46 +++++++++++--------------- src/gallium/drivers/nv50/nv50_vbo.c | 22 ++++++++++++ 11 files changed, 240 insertions(+), 26 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_clear.c b/src/gallium/drivers/nv50/nv50_clear.c index f35087b37e..fbc6cd09ce 100644 --- a/src/gallium/drivers/nv50/nv50_clear.c +++ b/src/gallium/drivers/nv50/nv50_clear.c @@ -1,3 +1,25 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 2494784a94..07987c7d02 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -1,3 +1,25 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_winsys.h" diff --git a/src/gallium/drivers/nv50/nv50_draw.c b/src/gallium/drivers/nv50/nv50_draw.c index 2b0c0384a3..4fd81bd27a 100644 --- a/src/gallium/drivers/nv50/nv50_draw.c +++ b/src/gallium/drivers/nv50/nv50_draw.c @@ -1,3 +1,25 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "draw/draw_pipe.h" #include "pipe/p_util.h" diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index f936a1f0e2..8b61ca2a1f 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -1,3 +1,25 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "pipe/p_util.h" diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index ff81719f03..2668acf299 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1,3 +1,25 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c index d8c3491c2c..26bd90ccc5 100644 --- a/src/gallium/drivers/nv50/nv50_query.c +++ b/src/gallium/drivers/nv50/nv50_query.c @@ -1,3 +1,25 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "pipe/p_context.h" #include "nv50_context.h" diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 9f1fba6e42..ec6d34e684 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -1,3 +1,25 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "pipe/p_screen.h" #include "pipe/p_util.h" diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 743de089d6..5c364c4ddf 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -1,3 +1,25 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "pipe/p_util.h" diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index bd1f048f87..5c75a242cc 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -1,3 +1,25 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "nv50_context.h" #include "nouveau/nouveau_stateobj.h" diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index acd7b501ef..6229b63626 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -1,30 +1,24 @@ - -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * +/* + * Copyright 2008 Ben Skeggs + * * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ #include "nv50_context.h" #include "pipe/p_defines.h" diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 0318b9b550..8e420ad172 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -1,3 +1,25 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "pipe/p_context.h" #include "pipe/p_state.h" #include "pipe/p_util.h" -- cgit v1.2.3 From 7c745de74997e859d7e2640092bda9ad900e28a9 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 11 Jul 2008 21:19:41 +1000 Subject: nv50: add some texture formats --- src/gallium/drivers/nv50/nv50_program.c | 2 + src/gallium/drivers/nv50/nv50_screen.c | 6 +++ src/gallium/drivers/nv50/nv50_tex.c | 93 ++++++++++++++++++++++++++++----- src/gallium/drivers/nv50/nv50_texture.h | 2 + 4 files changed, 90 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 2668acf299..0d3ddb8a59 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1701,6 +1701,8 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) if (p->buffer) pipe_buffer_reference(ws, &p->buffer, NULL); + nv50->screen->nvws->res_free(&p->data); + p->translated = 0; } diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index ec6d34e684..bba5ca4d4e 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -51,7 +51,13 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, case PIPE_TEXTURE: switch (format) { case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_A8L8_UNORM: return TRUE; default: break; diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index 581a9e2ffb..fde3c97c05 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -25,6 +25,81 @@ #include "nouveau/nouveau_stateobj.h" +static int +nv50_tex_construct(struct nouveau_stateobj *so, struct nv50_miptree *mt) +{ + switch (mt->base.format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_8_8_8_8); + break; + case PIPE_FORMAT_A1R5G5B5_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_1_5_5_5); + break; + case PIPE_FORMAT_A4R4G4B4_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_4_4_4_4); + break; + case PIPE_FORMAT_R5G6B5_UNORM: + so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_5_6_5); + break; + case PIPE_FORMAT_L8_UNORM: + so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_8); + break; + case PIPE_FORMAT_A8_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C0 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_ZERO | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_ZERO | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_ZERO | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_8); + break; + case PIPE_FORMAT_I8_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C0 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_8); + break; + case PIPE_FORMAT_A8L8_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C1 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_8_8); + break; + default: + return 1; + } + + so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW, 0, 0); + so_data (so, 0xd0005000); + so_data (so, 0x00300000); + so_data (so, mt->base.width[0]); + so_data (so, (mt->base.depth[0] << 16) | mt->base.height[0]); + so_data (so, 0x03000000); + so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH, 0, 0); + + return 0; +} + void nv50_tex_validate(struct nv50_context *nv50) { @@ -37,19 +112,11 @@ nv50_tex_validate(struct nv50_context *nv50) so_data (so, NV50_CB_TIC); so_method(so, tesla, 0x40000f04, nv50->miptree_nr * 8); for (i = 0; i < nv50->miptree_nr; i++) { - struct nv50_miptree *mt = nv50->miptree[i]; - - so_data (so, 0x2a712488); - so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_LOW, 0, 0); - so_data (so, 0xd0005000); - so_data (so, 0x00300000); - so_data (so, mt->base.width[0]); - so_data (so, (mt->base.depth[0] << 16) | - mt->base.height[0]); - so_data (so, 0x03000000); - so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_HIGH, 0, 0); + if (nv50_tex_construct(so, nv50->miptree[i])) { + NOUVEAU_ERR("failed tex validate\n"); + so_ref(NULL, &so); + return; + } } so_ref(so, &nv50->state.tic_upload); diff --git a/src/gallium/drivers/nv50/nv50_texture.h b/src/gallium/drivers/nv50/nv50_texture.h index c8fb282f21..6861d67b4d 100644 --- a/src/gallium/drivers/nv50/nv50_texture.h +++ b/src/gallium/drivers/nv50/nv50_texture.h @@ -45,6 +45,8 @@ #define NV50TIC_0_0_TYPEB_UNORM 0x00000080 #define NV50TIC_0_0_FMT_MASK 0x0000003c #define NV50TIC_0_0_FMT_8_8_8_8 0x00000008 +#define NV50TIC_0_0_FMT_4_4_4_4 0x00000012 +#define NV50TIC_0_0_FMT_1_5_5_5 0x00000013 #define NV50TIC_0_0_FMT_5_6_5 0x00000015 #define NV50TIC_0_0_FMT_8_8 0x00000018 #define NV50TIC_0_0_FMT_8 0x0000001d -- cgit v1.2.3 From fda01b584715c05696a0e6768fda669ef1eb5f3b Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 7 Aug 2008 11:26:17 +1000 Subject: nouveau: fix build --- src/gallium/drivers/nv04/nv04_fragprog.c | 4 ++-- src/gallium/drivers/nv04/nv04_state.c | 2 +- src/gallium/drivers/nv04/nv04_state.h | 2 +- src/gallium/drivers/nv10/nv10_fragprog.c | 4 ++-- src/gallium/drivers/nv10/nv10_state.c | 2 +- src/gallium/drivers/nv10/nv10_state.h | 2 +- src/gallium/drivers/nv30/nv30_fragprog.c | 4 ++-- src/gallium/drivers/nv30/nv30_state.c | 2 +- src/gallium/drivers/nv30/nv30_state.h | 2 +- src/gallium/drivers/nv30/nv30_vertprog.c | 2 +- src/gallium/drivers/nv40/nv40_fragprog.c | 4 ++-- src/gallium/drivers/nv40/nv40_state.c | 2 +- src/gallium/drivers/nv40/nv40_state.h | 2 +- src/gallium/drivers/nv40/nv40_vertprog.c | 4 ++-- src/gallium/drivers/nv50/nv50_program.c | 4 ++-- src/gallium/drivers/nv50/nv50_program.h | 2 +- src/gallium/drivers/nv50/nv50_state.c | 2 +- 17 files changed, 23 insertions(+), 23 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv04/nv04_fragprog.c b/src/gallium/drivers/nv04/nv04_fragprog.c index 11f4360ed8..215974eec0 100644 --- a/src/gallium/drivers/nv04/nv04_fragprog.c +++ b/src/gallium/drivers/nv04/nv04_fragprog.c @@ -4,8 +4,8 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_util.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" #include "nv04_context.h" diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c index 7e71dee7b5..668d875671 100644 --- a/src/gallium/drivers/nv04/nv04_state.c +++ b/src/gallium/drivers/nv04/nv04_state.c @@ -4,7 +4,7 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" #include "nv04_context.h" #include "nv04_state.h" diff --git a/src/gallium/drivers/nv04/nv04_state.h b/src/gallium/drivers/nv04/nv04_state.h index 39f7cd17b3..399f750dbe 100644 --- a/src/gallium/drivers/nv04/nv04_state.h +++ b/src/gallium/drivers/nv04/nv04_state.h @@ -2,7 +2,7 @@ #define __NV04_STATE_H__ #include "pipe/p_state.h" -#include "tgsi/util/tgsi_scan.h" +#include "tgsi/tgsi_scan.h" struct nv04_blend_state { uint32_t b_enable; diff --git a/src/gallium/drivers/nv10/nv10_fragprog.c b/src/gallium/drivers/nv10/nv10_fragprog.c index 2a63c8a704..137de9d53e 100644 --- a/src/gallium/drivers/nv10/nv10_fragprog.c +++ b/src/gallium/drivers/nv10/nv10_fragprog.c @@ -4,8 +4,8 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_util.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" #include "nv10_context.h" diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c index 43b9c32f12..f902fd54b6 100644 --- a/src/gallium/drivers/nv10/nv10_state.c +++ b/src/gallium/drivers/nv10/nv10_state.c @@ -4,7 +4,7 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" #include "nv10_context.h" #include "nv10_state.h" diff --git a/src/gallium/drivers/nv10/nv10_state.h b/src/gallium/drivers/nv10/nv10_state.h index f1f9a12110..3a3fd0d4f4 100644 --- a/src/gallium/drivers/nv10/nv10_state.h +++ b/src/gallium/drivers/nv10/nv10_state.h @@ -2,7 +2,7 @@ #define __NV10_STATE_H__ #include "pipe/p_state.h" -#include "tgsi/util/tgsi_scan.h" +#include "tgsi/tgsi_scan.h" struct nv10_blend_state { uint32_t b_enable; diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index 59e79d66f2..68058264e3 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -3,8 +3,8 @@ #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_util.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" #include "nv30_context.h" diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index ba02413de5..8d88d6c806 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -3,7 +3,7 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" #include "nv30_context.h" #include "nv30_state.h" diff --git a/src/gallium/drivers/nv30/nv30_state.h b/src/gallium/drivers/nv30/nv30_state.h index 20c5176160..e6f23bf166 100644 --- a/src/gallium/drivers/nv30/nv30_state.h +++ b/src/gallium/drivers/nv30/nv30_state.h @@ -2,7 +2,7 @@ #define __NV30_STATE_H__ #include "pipe/p_state.h" -#include "tgsi/util/tgsi_scan.h" +#include "tgsi/tgsi_scan.h" struct nv30_sampler_state { uint32_t fmt; diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index 39852ce983..6c075cdb75 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -3,7 +3,7 @@ #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" #include "nv30_context.h" #include "nv30_state.h" diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 428348c338..a361509e8f 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -3,8 +3,8 @@ #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_util.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" #include "nv40_context.h" diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index 5d2c3ab881..63d0ecc915 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -5,7 +5,7 @@ #include "draw/draw_context.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" #include "nv40_context.h" #include "nv40_state.h" diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nv40/nv40_state.h index 2b4225deb2..8a9d8c8fdf 100644 --- a/src/gallium/drivers/nv40/nv40_state.h +++ b/src/gallium/drivers/nv40/nv40_state.h @@ -2,7 +2,7 @@ #define __NV40_STATE_H__ #include "pipe/p_state.h" -#include "tgsi/util/tgsi_scan.h" +#include "tgsi/tgsi_scan.h" struct nv40_sampler_state { uint32_t fmt; diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index 1e486a66ef..ff988e6a5f 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -3,8 +3,8 @@ #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_util.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" #include "nv40_context.h" #include "nv40_state.h" diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 0d3ddb8a59..d6fbdd1824 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -26,8 +26,8 @@ #include "pipe/p_inlines.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_util.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" #include "nv50_context.h" diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index d643e8db21..78deed6a38 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -2,7 +2,7 @@ #define __NV50_PROGRAM_H__ #include "pipe/p_state.h" -#include "tgsi/util/tgsi_scan.h" +#include "tgsi/tgsi_scan.h" struct nv50_program_exec { struct nv50_program_exec *next; diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 731409bed4..4055527d9f 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -25,7 +25,7 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" #include "nv50_context.h" #include "nv50_texture.h" -- cgit v1.2.3 From 3f66b72fdb4834c5211305698d22806eac80aa35 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 5 Jan 2009 13:38:47 +1100 Subject: nv50: ensure we actually get contiguous regs for TEX insn. Still many more horrible things to fix here... --- src/gallium/drivers/nv50/nv50_program.c | 59 +++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 17 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index d6fbdd1824..566d18cef4 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -179,6 +179,38 @@ free_temp(struct nv50_pc *pc, struct nv50_reg *r) } } +static int +alloc_temp4(struct nv50_pc *pc, struct nv50_reg *dst[4], int idx) +{ + int i; + + if ((idx + 4) >= NV50_SU_MAX_TEMP) + return 1; + + if (pc->r_temp[idx] || pc->r_temp[idx + 1] || + pc->r_temp[idx + 2] || pc->r_temp[idx + 3]) + return alloc_temp4(pc, dst, idx + 1); + + for (i = 0; i < 4; i++) { + dst[i] = CALLOC_STRUCT(nv50_reg); + dst[i]->type = P_TEMP; + dst[i]->index = -1; + dst[i]->hw = idx + i; + pc->r_temp[idx + i] = dst[i]; + } + + return 0; +} + +static void +free_temp4(struct nv50_pc *pc, struct nv50_reg *reg[4]) +{ + int i; + + for (i = 0; i < 4; i++) + free_temp(pc, reg[i]); +} + static struct nv50_reg * temp_temp(struct nv50_pc *pc) { @@ -1156,33 +1188,26 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) break; case TGSI_OPCODE_TEX: { - struct nv50_reg *t0, *t1, *t2, *t3; + struct nv50_reg *t[4]; struct nv50_program_exec *e; - t0 = alloc_temp(pc, NULL); - t0 = alloc_temp(pc, NULL); - t1 = alloc_temp(pc, NULL); - t2 = alloc_temp(pc, NULL); - t3 = alloc_temp(pc, NULL); - emit_mov(pc, t0, src[0][0]); - emit_mov(pc, t1, src[0][1]); + alloc_temp4(pc, t, 0); + emit_mov(pc, t[0], src[0][0]); + emit_mov(pc, t[1], src[0][1]); e = exec(pc); e->inst[0] = 0xf6400000; set_long(pc, e); e->inst[1] |= 0x0000c004; - set_dst(pc, t0, e); + set_dst(pc, t[0], e); emit(pc, e); - if (mask & (1 << 0)) emit_mov(pc, dst[0], t0); - if (mask & (1 << 1)) emit_mov(pc, dst[1], t1); - if (mask & (1 << 2)) emit_mov(pc, dst[2], t2); - if (mask & (1 << 3)) emit_mov(pc, dst[3], t3); + if (mask & (1 << 0)) emit_mov(pc, dst[0], t[0]); + if (mask & (1 << 1)) emit_mov(pc, dst[1], t[1]); + if (mask & (1 << 2)) emit_mov(pc, dst[2], t[2]); + if (mask & (1 << 3)) emit_mov(pc, dst[3], t[3]); - free_temp(pc, t0); - free_temp(pc, t1); - free_temp(pc, t2); - free_temp(pc, t3); + free_temp4(pc, t); } break; case TGSI_OPCODE_XPD: -- cgit v1.2.3 From 0d54770cabbe034b0f07ab1b211c374d92ce19d4 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 5 Jan 2009 13:41:12 +1100 Subject: nv50: indentation for TEX is a little overenthusiastic --- src/gallium/drivers/nv50/nv50_program.c | 41 ++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 18 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 566d18cef4..b205cdbaca 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1187,28 +1187,28 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } break; case TGSI_OPCODE_TEX: - { - struct nv50_reg *t[4]; - struct nv50_program_exec *e; + { + struct nv50_reg *t[4]; + struct nv50_program_exec *e; - alloc_temp4(pc, t, 0); - emit_mov(pc, t[0], src[0][0]); - emit_mov(pc, t[1], src[0][1]); + alloc_temp4(pc, t, 0); + emit_mov(pc, t[0], src[0][0]); + emit_mov(pc, t[1], src[0][1]); - e = exec(pc); - e->inst[0] = 0xf6400000; - set_long(pc, e); - e->inst[1] |= 0x0000c004; - set_dst(pc, t[0], e); - emit(pc, e); + e = exec(pc); + e->inst[0] = 0xf6400000; + set_long(pc, e); + e->inst[1] |= 0x0000c004; + set_dst(pc, t[0], e); + emit(pc, e); - if (mask & (1 << 0)) emit_mov(pc, dst[0], t[0]); - if (mask & (1 << 1)) emit_mov(pc, dst[1], t[1]); - if (mask & (1 << 2)) emit_mov(pc, dst[2], t[2]); - if (mask & (1 << 3)) emit_mov(pc, dst[3], t[3]); + if (mask & (1 << 0)) emit_mov(pc, dst[0], t[0]); + if (mask & (1 << 1)) emit_mov(pc, dst[1], t[1]); + if (mask & (1 << 2)) emit_mov(pc, dst[2], t[2]); + if (mask & (1 << 3)) emit_mov(pc, dst[3], t[3]); - free_temp4(pc, t); - } + free_temp4(pc, t); + } break; case TGSI_OPCODE_XPD: temp = alloc_temp(pc, NULL); @@ -1595,8 +1595,13 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) if (!upload) return; + NOUVEAU_ERR("-------\n"); up = ptr = MALLOC(p->exec_size * 4); for (e = p->exec_head; e; e = e->next) { + NOUVEAU_ERR("0x%08x\n", e->inst[0]); + if (is_long(e)) + NOUVEAU_ERR("0x%08x\n", e->inst[1]); + *(ptr++) = e->inst[0]; if (is_long(e)) *(ptr++) = e->inst[1]; -- cgit v1.2.3 From 5696267efd6f85d79f5fe511d1a066a17c4d1ccc Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 5 Jan 2009 14:06:43 +1100 Subject: nv50: add TXP to TEX case.. not correct, but anyway.. --- src/gallium/drivers/nv50/nv50_program.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index b205cdbaca..147a98aca0 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1187,6 +1187,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } break; case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXP: { struct nv50_reg *t[4]; struct nv50_program_exec *e; -- cgit v1.2.3 From 00b15c9f40944d94aa28a441edd7ebb51577d9ba Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 5 Jan 2009 15:56:19 +1100 Subject: nv50: fix crash in nv50_program_destroy --- src/gallium/drivers/nv50/nv50_program.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 147a98aca0..8414d06d18 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1718,7 +1718,7 @@ nv50_fragprog_validate(struct nv50_context *nv50) void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) { - struct pipe_winsys *ws = nv50->pipe.winsys; + struct pipe_screen *pscreen = nv50->pipe.screen; while (p->exec_head) { struct nv50_program_exec *e = p->exec_head; @@ -1730,7 +1730,7 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) p->exec_size = 0; if (p->buffer) - pipe_buffer_reference(ws, &p->buffer, NULL); + pipe_buffer_reference(pscreen, &p->buffer, NULL); nv50->screen->nvws->res_free(&p->data); -- cgit v1.2.3 From cd5d3fde13e424373feac9098453ed0ca7f6e4eb Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 5 Jan 2009 16:04:41 +1100 Subject: nv50: fill image unit index in TEX varients --- src/gallium/drivers/nv50/nv50_program.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 8414d06d18..d66e1d0949 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -934,7 +934,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) { const struct tgsi_full_instruction *inst = &tok->FullInstruction; struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp; - unsigned mask, sat; + unsigned mask, sat, unit; int i, c; mask = inst->FullDstRegisters[0].DstRegister.WriteMask; @@ -948,8 +948,13 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i]; + + if (fs->SrcRegister.File == TGSI_FILE_SAMPLER) + unit = fs->SrcRegister.Index; + for (c = 0; c < 4; c++) - src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]); + src[i][c] = tgsi_src(pc, c, fs); } if (sat) { @@ -1198,6 +1203,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) e = exec(pc); e->inst[0] = 0xf6400000; + e->inst[0] |= (unit << 9); set_long(pc, e); e->inst[1] |= 0x0000c004; set_dst(pc, t[0], e); -- cgit v1.2.3 From b01d0077af9d93c582e5f53ebd358ac8148b22df Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 12 Jan 2009 14:26:15 +1000 Subject: nv50: disable shader debug --- src/gallium/drivers/nv50/nv50_program.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index d66e1d0949..bc85ede92e 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -32,7 +32,7 @@ #include "nv50_context.h" #define NV50_SU_MAX_TEMP 64 -#define NV50_PROGRAM_DUMP +//#define NV50_PROGRAM_DUMP /* ARL - gallium craps itself on progs/vp/arl.txt * @@ -1602,13 +1602,19 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) if (!upload) return; +#ifdef NV50_PROGRAM_DUMP NOUVEAU_ERR("-------\n"); up = ptr = MALLOC(p->exec_size * 4); for (e = p->exec_head; e; e = e->next) { NOUVEAU_ERR("0x%08x\n", e->inst[0]); if (is_long(e)) NOUVEAU_ERR("0x%08x\n", e->inst[1]); + } + +#endif + up = ptr = MALLOC(p->exec_size * 4); + for (e = p->exec_head; e; e = e->next) { *(ptr++) = e->inst[0]; if (is_long(e)) *(ptr++) = e->inst[1]; -- cgit v1.2.3 From d2442016afdc5e3b12b04d912f005ab183f7b8ff Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 13 Jan 2009 09:56:40 +1000 Subject: nv50: implement KIL enough for progs/fp/kil to work --- src/gallium/drivers/nv50/nv50_program.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index bc85ede92e..5537a47902 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -841,6 +841,28 @@ emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) emit(pc, e); } +static void +emit_kil(struct nv50_pc *pc, struct nv50_reg *src) +{ + struct nv50_program_exec *e; + const int r_pred = 1; + + /* Sets predicate reg ? */ + e = exec(pc); + e->inst[0] = 0xa00001fd; + e->inst[1] = 0xc4014788; + set_src_0(pc, src, e); + set_pred_wr(pc, 1, r_pred, e); + emit(pc, e); + + /* This is probably KILP */ + e = exec(pc); + e->inst[0] = 0x000001fe; + set_long(pc, e); + set_pred(pc, 1 /* LT? */, r_pred, e); + emit(pc, e); +} + static struct nv50_reg * tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) { @@ -1069,6 +1091,12 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } free_temp(pc, temp); break; + case TGSI_OPCODE_KIL: + emit_kil(pc, src[0][0]); + emit_kil(pc, src[0][1]); + emit_kil(pc, src[0][2]); + emit_kil(pc, src[0][3]); + break; case TGSI_OPCODE_LIT: emit_lit(pc, &dst[0], mask, &src[0][0]); break; -- cgit v1.2.3 From 8337c78d91612d615a1368ee8ee188d80574fad4 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 13 Jan 2009 12:49:53 +1000 Subject: nv50: change some magic reg, makes more things work No real idea what this does.. but a lot of things that misrendered and made the GPU throw a DATA_ERROR now work.. I'm wondering what side-effects we'll see from this :) --- src/gallium/drivers/nv50/nv50_program.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 5537a47902..7686f746eb 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1739,7 +1739,7 @@ nv50_fragprog_validate(struct nv50_context *nv50) so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); so_method(so, tesla, 0x1904, 4); - so_data (so, 0x01040404); /* p: 0x01000404 */ + so_data (so, 0x00040404); /* p: 0x01000404 */ so_data (so, 0x00000004); so_data (so, 0x00000000); so_data (so, 0x00000000); -- cgit v1.2.3 From 5069bfed29bcee2c89c36c74c6d65d388eb7792e Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 2 Feb 2009 23:47:16 -0500 Subject: gallium: remove pipe_buffer from surfaces this change disassociates, at least from the driver perspective, the surface from buffer. surfaces are technically now views on the textures so make it so by hiding the buffer in the internals of textures. --- src/gallium/auxiliary/util/u_gen_mipmap.c | 36 +++++++++++----------- src/gallium/drivers/i915simple/i915_screen.c | 6 ++-- src/gallium/drivers/i915simple/i915_state_emit.c | 22 ++++++++----- src/gallium/drivers/i915simple/i915_surface.c | 9 ++++-- src/gallium/drivers/i915simple/i915_texture.c | 2 -- src/gallium/drivers/i965simple/brw_misc_state.c | 4 +-- src/gallium/drivers/i965simple/brw_surface.c | 9 ++++-- src/gallium/drivers/i965simple/brw_tex_layout.c | 1 - .../drivers/i965simple/brw_wm_surface_state.c | 3 +- src/gallium/drivers/nv04/nv04_miptree.c | 2 -- src/gallium/drivers/nv04/nv04_screen.c | 9 +++--- src/gallium/drivers/nv04/nv04_state_emit.c | 9 ++++-- src/gallium/drivers/nv10/nv10_miptree.c | 1 - src/gallium/drivers/nv10/nv10_screen.c | 6 ++-- src/gallium/drivers/nv10/nv10_state_emit.c | 7 +++-- src/gallium/drivers/nv20/nv20_miptree.c | 2 -- src/gallium/drivers/nv20/nv20_screen.c | 6 ++-- src/gallium/drivers/nv20/nv20_state_emit.c | 7 +++-- src/gallium/drivers/nv30/nv30_miptree.c | 2 -- src/gallium/drivers/nv30/nv30_screen.c | 6 ++-- src/gallium/drivers/nv30/nv30_state_fb.c | 16 ++++++---- src/gallium/drivers/nv40/nv40_miptree.c | 2 -- src/gallium/drivers/nv40/nv40_screen.c | 9 ++++-- src/gallium/drivers/nv40/nv40_state_fb.c | 27 ++++++++++------ src/gallium/drivers/nv50/nv50_context.h | 7 +++++ src/gallium/drivers/nv50/nv50_miptree.c | 6 +--- src/gallium/drivers/nv50/nv50_program.c | 2 +- src/gallium/drivers/nv50/nv50_state_validate.c | 8 ++--- src/gallium/drivers/nv50/nv50_surface.c | 4 +-- src/gallium/drivers/softpipe/sp_texture.c | 21 ++++++++----- src/gallium/drivers/softpipe/sp_tile_cache.c | 2 +- src/gallium/drivers/trace/tr_state.c | 1 - src/gallium/include/pipe/p_state.h | 1 - src/gallium/winsys/egl_xlib/egl_xlib.c | 5 +-- src/gallium/winsys/xlib/Makefile | 4 +-- src/gallium/winsys/xlib/xlib_brw_screen.c | 13 ++++++-- src/gallium/winsys/xlib/xlib_cell.c | 4 ++- src/gallium/winsys/xlib/xlib_softpipe.c | 8 +++-- src/mesa/state_tracker/st_cb_fbo.c | 1 - src/mesa/state_tracker/st_gen_mipmap.c | 12 ++++---- 40 files changed, 177 insertions(+), 125 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 301a58ed7b..2b4cdab6cf 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -1125,19 +1125,19 @@ make_1d_mipmap(struct gen_mipmap_state *ctx, dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice, PIPE_BUFFER_USAGE_CPU_WRITE); - srcMap = ((ubyte *) pipe_buffer_map(screen, srcSurf->buffer, - PIPE_BUFFER_USAGE_CPU_READ) + srcMap = ((ubyte *) pipe_surface_map(srcSurf, + PIPE_BUFFER_USAGE_CPU_READ) + srcSurf->offset); - dstMap = ((ubyte *) pipe_buffer_map(screen, dstSurf->buffer, - PIPE_BUFFER_USAGE_CPU_WRITE) + dstMap = ((ubyte *) pipe_surface_map(dstSurf, + PIPE_BUFFER_USAGE_CPU_WRITE) + dstSurf->offset); reduce_1d(pt->format, srcSurf->width, srcMap, dstSurf->width, dstMap); - pipe_buffer_unmap(screen, srcSurf->buffer); - pipe_buffer_unmap(screen, dstSurf->buffer); + pipe_surface_unmap(srcSurf); + pipe_surface_unmap(dstSurf); pipe_surface_reference(&srcSurf, NULL); pipe_surface_reference(&dstSurf, NULL); @@ -1168,11 +1168,11 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice, PIPE_BUFFER_USAGE_CPU_WRITE); - srcMap = ((ubyte *) pipe_buffer_map(screen, srcSurf->buffer, - PIPE_BUFFER_USAGE_CPU_READ) + srcMap = ((ubyte *) pipe_surface_map(srcSurf, + PIPE_BUFFER_USAGE_CPU_READ) + srcSurf->offset); - dstMap = ((ubyte *) pipe_buffer_map(screen, dstSurf->buffer, - PIPE_BUFFER_USAGE_CPU_WRITE) + dstMap = ((ubyte *) pipe_surface_map(dstSurf, + PIPE_BUFFER_USAGE_CPU_WRITE) + dstSurf->offset); reduce_2d(pt->format, @@ -1181,8 +1181,8 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, dstSurf->width, dstSurf->height, dstSurf->stride, dstMap); - pipe_buffer_unmap(screen, srcSurf->buffer); - pipe_buffer_unmap(screen, dstSurf->buffer); + pipe_surface_unmap(srcSurf); + pipe_surface_unmap(dstSurf); pipe_surface_reference(&srcSurf, NULL); pipe_surface_reference(&dstSurf, NULL); @@ -1212,11 +1212,11 @@ make_3d_mipmap(struct gen_mipmap_state *ctx, dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice, PIPE_BUFFER_USAGE_CPU_WRITE); - srcMap = ((ubyte *) pipe_buffer_map(screen, srcSurf->buffer, - PIPE_BUFFER_USAGE_CPU_READ) + srcMap = ((ubyte *) pipe_surface_map(srcSurf, + PIPE_BUFFER_USAGE_CPU_READ) + srcSurf->offset); - dstMap = ((ubyte *) pipe_buffer_map(screen, dstSurf->buffer, - PIPE_BUFFER_USAGE_CPU_WRITE) + dstMap = ((ubyte *) pipe_surface_map(dstSurf, + PIPE_BUFFER_USAGE_CPU_WRITE) + dstSurf->offset); #if 0 @@ -1229,8 +1229,8 @@ make_3d_mipmap(struct gen_mipmap_state *ctx, (void) reduce_3d; #endif - pipe_buffer_unmap(screen, srcSurf->buffer); - pipe_buffer_unmap(screen, dstSurf->buffer); + pipe_surface_unmap(srcSurf); + pipe_surface_unmap(dstSurf); pipe_surface_reference(&srcSurf, NULL); pipe_surface_reference(&dstSurf, NULL); diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index 5bb127f3d5..39e48105b3 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -209,7 +209,8 @@ i915_surface_map( struct pipe_screen *screen, struct pipe_surface *surface, unsigned flags ) { - char *map = pipe_buffer_map( screen, surface->buffer, flags ); + struct i915_texture *tex = (struct i915_texture *)surface->texture; + char *map = pipe_buffer_map( screen, tex->buffer, flags ); if (map == NULL) return NULL; @@ -228,7 +229,8 @@ static void i915_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) { - pipe_buffer_unmap( screen, surface->buffer ); + struct i915_texture *tex = (struct i915_texture *)surface->texture; + pipe_buffer_unmap( screen, tex->buffer ); } diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c index 9bd6f92323..6558cf1c3e 100644 --- a/src/gallium/drivers/i915simple/i915_state_emit.c +++ b/src/gallium/drivers/i915simple/i915_state_emit.c @@ -213,18 +213,22 @@ i915_emit_hardware_state(struct i915_context *i915 ) if (cbuf_surface) { unsigned cpitch = cbuf_surface->stride; unsigned ctile = BUF_3D_USE_FENCE; - if (cbuf_surface->texture && - ((struct i915_texture*)(cbuf_surface->texture))->tiled) { + struct i915_texture *tex = (struct i915_texture *) + cbuf_surface->texture; + struct pipe_buffer *buffer = tex->buffer; + assert(tex); + + if (tex && tex->tiled) { ctile = BUF_3D_TILED_SURFACE; } OUT_BATCH(_3DSTATE_BUF_INFO_CMD); - OUT_BATCH(BUF_3D_ID_COLOR_BACK | + OUT_BATCH(BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(cpitch) | /* pitch in bytes */ ctile); - OUT_RELOC(cbuf_surface->buffer, + OUT_RELOC(tex->buffer, I915_BUFFER_ACCESS_WRITE, cbuf_surface->offset); } @@ -234,8 +238,12 @@ i915_emit_hardware_state(struct i915_context *i915 ) if (depth_surface) { unsigned zpitch = depth_surface->stride; unsigned ztile = BUF_3D_USE_FENCE; - if (depth_surface->texture && - ((struct i915_texture*)(depth_surface->texture))->tiled) { + struct i915_texture *tex = (struct i915_texture *) + depth_surface->texture; + struct pipe_buffer *buffer = tex->buffer; + assert(tex); + + if (tex && tex->tiled) { ztile = BUF_3D_TILED_SURFACE; } @@ -245,7 +253,7 @@ i915_emit_hardware_state(struct i915_context *i915 ) BUF_3D_PITCH(zpitch) | /* pitch in bytes */ ztile); - OUT_RELOC(depth_surface->buffer, + OUT_RELOC(tex->buffer, I915_BUFFER_ACCESS_WRITE, depth_surface->offset); } diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c index 5ffdb76682..94e2deaf61 100644 --- a/src/gallium/drivers/i915simple/i915_surface.c +++ b/src/gallium/drivers/i915simple/i915_surface.c @@ -74,13 +74,15 @@ i915_surface_copy(struct pipe_context *pipe, pipe->screen->surface_unmap(pipe->screen, dst); } else { + struct i915_texture *dst_tex = (struct i915_texture *)dst->texture; + struct i915_texture *src_tex = (struct i915_texture *)src->texture; assert(dst->block.width == 1); assert(dst->block.height == 1); i915_copy_blit( i915_context(pipe), do_flip, dst->block.size, - (unsigned short) src->stride, src->buffer, src->offset, - (unsigned short) dst->stride, dst->buffer, dst->offset, + (unsigned short) src->stride, src_tex->buffer, src->offset, + (unsigned short) dst->stride, dst_tex->buffer, dst->offset, (short) srcx, (short) srcy, (short) dstx, (short) dsty, (short) width, (short) height ); } } @@ -102,12 +104,13 @@ i915_surface_fill(struct pipe_context *pipe, pipe->screen->surface_unmap(pipe->screen, dst); } else { + struct i915_texture *tex = (struct i915_texture *)dst->texture; assert(dst->block.width == 1); assert(dst->block.height == 1); i915_fill_blit( i915_context(pipe), dst->block.size, (unsigned short) dst->stride, - dst->buffer, dst->offset, + tex->buffer, dst->offset, (short) dstx, (short) dsty, (short) width, (short) height, value ); diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 803ef3a187..b2ca3a2286 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -683,7 +683,6 @@ i915_get_tex_surface(struct pipe_screen *screen, if (ps) { ps->refcount = 1; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(screen, &ps->buffer, tex->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; @@ -755,7 +754,6 @@ i915_tex_surface_release(struct pipe_screen *screen, } pipe_texture_reference(&surf->texture, NULL); - pipe_buffer_reference(screen, &surf->buffer, NULL); FREE(surf); } diff --git a/src/gallium/drivers/i965simple/brw_misc_state.c b/src/gallium/drivers/i965simple/brw_misc_state.c index be812c5da9..99ff4403a5 100644 --- a/src/gallium/drivers/i965simple/brw_misc_state.c +++ b/src/gallium/drivers/i965simple/brw_misc_state.c @@ -223,7 +223,7 @@ static void upload_depthbuffer(struct brw_context *brw) OUT_BATCH(0); } else { unsigned int format; - + struct brw_texture *tex = (struct brw_texture *)depth_surface->texture; assert(depth_surface->block.width == 1); assert(depth_surface->block.height == 1); switch (depth_surface->block.size) { @@ -246,7 +246,7 @@ static void upload_depthbuffer(struct brw_context *brw) (BRW_TILEWALK_YMAJOR << 26) | // (depth_surface->region->tiled << 27) | (BRW_SURFACE_2D << 29)); - OUT_RELOC(depth_surface->buffer, + OUT_RELOC(tex->buffer, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE, 0); OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | ((depth_surface->stride/depth_surface->block.size - 1) << 6) | diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c index 3159eba2fd..0a95dce194 100644 --- a/src/gallium/drivers/i965simple/brw_surface.c +++ b/src/gallium/drivers/i965simple/brw_surface.c @@ -74,13 +74,15 @@ brw_surface_copy(struct pipe_context *pipe, pipe->screen->surface_unmap(pipe->screen, dst); } else { + struct brw_texture *dst_tex = (struct brw_texture *)dst->texture; + struct brw_texture *src_tex = (struct brw_texture *)src->texture; assert(dst->block.width == 1); assert(dst->block.height == 1); brw_copy_blit(brw_context(pipe), do_flip, dst->block.size, - (short) src->stride/src->block.size, src->buffer, src->offset, FALSE, - (short) dst->stride/dst->block.size, dst->buffer, dst->offset, FALSE, + (short) src->stride/src->block.size, src_tex->buffer, src->offset, FALSE, + (short) dst->stride/dst->block.size, dst_tex->buffer, dst->offset, FALSE, (short) srcx, (short) srcy, (short) dstx, (short) dsty, (short) width, (short) height, PIPE_LOGICOP_COPY); } @@ -103,12 +105,13 @@ brw_surface_fill(struct pipe_context *pipe, pipe->screen->surface_unmap(pipe->screen, dst); } else { + struct brw_texture *tex = (struct brw_texture *)dst->texture; assert(dst->block.width == 1); assert(dst->block.height == 1); brw_fill_blit(brw_context(pipe), dst->block.size, (short) dst->stride/dst->block.size, - dst->buffer, dst->offset, FALSE, + tex->buffer, dst->offset, FALSE, (short) dstx, (short) dsty, (short) width, (short) height, value); diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c index 6af0d5cf4b..448229ed4e 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -367,7 +367,6 @@ brw_get_tex_surface_screen(struct pipe_screen *screen, if (ps) { ps->refcount = 1; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(screen, &ps->buffer, tex->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; diff --git a/src/gallium/drivers/i965simple/brw_wm_surface_state.c b/src/gallium/drivers/i965simple/brw_wm_surface_state.c index 1a326f9918..1bab5bfdb3 100644 --- a/src/gallium/drivers/i965simple/brw_wm_surface_state.c +++ b/src/gallium/drivers/i965simple/brw_wm_surface_state.c @@ -193,6 +193,7 @@ static void upload_wm_surfaces(struct brw_context *brw ) /* BRW_NEW_FRAMEBUFFER */ struct pipe_surface *pipe_surface = brw->attribs.FrameBuffer.cbufs[0];/*fixme*/ + struct brw_texture *tex = (struct brw_texture *)pipe_surface->texture; memset(&surf, 0, sizeof(surf)); @@ -204,7 +205,7 @@ static void upload_wm_surfaces(struct brw_context *brw ) surf.ss0.surface_type = BRW_SURFACE_2D; - surf.ss1.base_addr = brw_buffer_offset( brw, pipe_surface->buffer ); + surf.ss1.base_addr = brw_buffer_offset( brw, tex->buffer ); surf.ss2.width = pipe_surface->width - 1; surf.ss2.height = pipe_surface->height - 1; diff --git a/src/gallium/drivers/nv04/nv04_miptree.c b/src/gallium/drivers/nv04/nv04_miptree.c index 01cb8ecbf3..0575dc0afc 100644 --- a/src/gallium/drivers/nv04/nv04_miptree.c +++ b/src/gallium/drivers/nv04/nv04_miptree.c @@ -107,7 +107,6 @@ nv04_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, if (!ps) return NULL; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(pscreen, &ps->buffer, nv04mt->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; @@ -138,7 +137,6 @@ nv04_miptree_surface_del(struct pipe_screen *pscreen, return; pipe_texture_reference(&ps->texture, NULL); - pipe_buffer_reference(pscreen->winsys, &ps->buffer, NULL); FREE(ps); } diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c index 2ae6784077..e5e3d4772a 100644 --- a/src/gallium/drivers/nv04/nv04_screen.c +++ b/src/gallium/drivers/nv04/nv04_screen.c @@ -1,4 +1,5 @@ #include "pipe/p_screen.h" +#include "pipe/p_inlines.h" #include "util/u_simple_screen.h" #include "nv04_context.h" @@ -122,10 +123,10 @@ static void * nv04_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, unsigned flags ) { - struct pipe_winsys *ws = screen->winsys; void *map; + struct nv04_miptree *nv04mt = (struct nv04_miptree *)surface->texture; - map = ws->buffer_map(ws, surface->buffer, flags); + map = pipe_buffer_map(screen, nv04mt->buffer, flags); if (!map) return NULL; @@ -135,9 +136,9 @@ nv04_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, static void nv04_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) { - struct pipe_winsys *ws = screen->winsys; + struct nv04_miptree *nv04mt = (struct nv04_miptree *)surface->texture; - ws->buffer_unmap(ws, surface->buffer); + pipe_buffer_unmap(screen, nv04mt->buffer); } static void diff --git a/src/gallium/drivers/nv04/nv04_state_emit.c b/src/gallium/drivers/nv04/nv04_state_emit.c index 26491758a0..bd8ef1adbf 100644 --- a/src/gallium/drivers/nv04/nv04_state_emit.c +++ b/src/gallium/drivers/nv04/nv04_state_emit.c @@ -96,6 +96,7 @@ static void nv04_state_emit_framebuffer(struct nv04_context* nv04) struct pipe_surface *rt, *zeta; uint32_t rt_format, w, h; int colour_format = 0, zeta_format = 0; + struct nv04_miptree *nv04mt = 0; w = fb->cbufs[0]->width; h = fb->cbufs[0]->height; @@ -129,14 +130,16 @@ static void nv04_state_emit_framebuffer(struct nv04_context* nv04) BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_FORMAT, 1); OUT_RING(rt_format); - + + nv04mt = (struct nv04_miptree *)rt->texture; /* FIXME pitches have to be aligned ! */ BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); OUT_RING(rt->stride|(zeta->stride<<16)); - OUT_RELOCl(rt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); if (fb->zsbuf) { + nv04mt = (struct nv04_miptree *)zeta->texture; BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); - OUT_RELOCl(zeta->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); } } diff --git a/src/gallium/drivers/nv10/nv10_miptree.c b/src/gallium/drivers/nv10/nv10_miptree.c index f8c021261b..909278213e 100644 --- a/src/gallium/drivers/nv10/nv10_miptree.c +++ b/src/gallium/drivers/nv10/nv10_miptree.c @@ -114,7 +114,6 @@ nv10_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, if (!ps) return NULL; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(screen, &ps->buffer, nv10mt->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c index 12516fd71e..2f945a193c 100644 --- a/src/gallium/drivers/nv10/nv10_screen.c +++ b/src/gallium/drivers/nv10/nv10_screen.c @@ -122,8 +122,9 @@ nv10_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, { struct pipe_winsys *ws = screen->winsys; void *map; + struct nv10_miptree *nv10mt = (struct nv10_miptree *)surface->texture; - map = ws->buffer_map(ws, surface->buffer, flags); + map = ws->buffer_map(ws, nv10mt->buffer, flags); if (!map) return NULL; @@ -134,8 +135,9 @@ static void nv10_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) { struct pipe_winsys *ws = screen->winsys; + struct nv10_miptree *nv10mt = (struct nv10_miptree *)surface->texture; - ws->buffer_unmap(ws, surface->buffer); + ws->buffer_unmap(ws, nv10mt->buffer); } static void diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c index 46c7e1d753..5dec618b93 100644 --- a/src/gallium/drivers/nv10/nv10_state_emit.c +++ b/src/gallium/drivers/nv10/nv10_state_emit.c @@ -106,6 +106,7 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10) struct pipe_surface *rt, *zeta = NULL; uint32_t rt_format, w, h; int colour_format = 0, zeta_format = 0; + struct nv10_miptree *nv10mt = 0; w = fb->cbufs[0]->width; h = fb->cbufs[0]->height; @@ -147,11 +148,13 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10) OUT_RING (rt->stride | (rt->stride << 16)); } - nv10->rt[0] = rt->buffer; + nv10mt = (struct nv10_miptree *)rt->texture; + nv10->rt[0] = nv10mt->buffer; if (zeta_format) { - nv10->zeta = zeta->buffer; + nv10mt = (struct nv10_miptree *)zeta->texture; + nv10->zeta = nv10mt->buffer; } BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 3); diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c index d2038c391d..8e4cc80902 100644 --- a/src/gallium/drivers/nv20/nv20_miptree.c +++ b/src/gallium/drivers/nv20/nv20_miptree.c @@ -106,7 +106,6 @@ nv20_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, if (!ps) return NULL; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(screen, &ps->buffer, nv20mt->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; @@ -141,7 +140,6 @@ nv20_miptree_surface_release(struct pipe_screen *pscreen, return; pipe_texture_reference(&ps->texture, NULL); - pipe_buffer_reference(pscreen, &ps->buffer, NULL); FREE(ps); } diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c index f09b364b8d..c9171fa178 100644 --- a/src/gallium/drivers/nv20/nv20_screen.c +++ b/src/gallium/drivers/nv20/nv20_screen.c @@ -122,8 +122,9 @@ nv20_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, { struct pipe_winsys *ws = screen->winsys; void *map; + struct nv20_miptree *nv20mt = (struct nv20_miptree *)surface->texture; - map = ws->buffer_map(ws, surface->buffer, flags); + map = ws->buffer_map(ws, nv20mt->buffer, flags); if (!map) return NULL; @@ -134,8 +135,9 @@ static void nv20_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) { struct pipe_winsys *ws = screen->winsys; + struct nv20_miptree *nv20mt = (struct nv20_miptree *)surface->texture; - ws->buffer_unmap(ws, surface->buffer); + ws->buffer_unmap(ws, nv20mt->buffer); } static void diff --git a/src/gallium/drivers/nv20/nv20_state_emit.c b/src/gallium/drivers/nv20/nv20_state_emit.c index ea20078a50..0f4df9ca31 100644 --- a/src/gallium/drivers/nv20/nv20_state_emit.c +++ b/src/gallium/drivers/nv20/nv20_state_emit.c @@ -112,6 +112,7 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20) struct pipe_surface *rt, *zeta = NULL; uint32_t rt_format, w, h; int colour_format = 0, zeta_format = 0; + struct nv20_miptree *nv20mt = 0; w = fb->cbufs[0]->width; h = fb->cbufs[0]->height; @@ -153,11 +154,13 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20) OUT_RING (rt->stride | (rt->stride << 16)); } - nv20->rt[0] = rt->buffer; + nv20mt = (struct nv20_miptree *)rt->texture; + nv20->rt[0] = nv20mt->buffer; if (zeta_format) { - nv20->zeta = zeta->buffer; + nv20mt = (struct nv20_miptree *)zeta->texture; + nv20->zeta = nv20mt->buffer; } BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 3); diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index 79baac714c..c55756971b 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -143,7 +143,6 @@ nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, if (!ps) return NULL; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(pscreen, &ps->buffer, nv30mt->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; @@ -181,7 +180,6 @@ nv30_miptree_surface_del(struct pipe_screen *pscreen, return; pipe_texture_reference(&ps->texture, NULL); - pipe_buffer_reference(pscreen, &ps->buffer, NULL); FREE(ps); } diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index 0f10d914ad..9738436dc4 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -134,6 +134,7 @@ nv30_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, struct pipe_winsys *ws = screen->winsys; struct pipe_surface *surface_to_map; void *map; + struct nv30_miptree *nv30mt = (struct nv30_miptree *)surface->texture; if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { struct nv30_miptree *mt = (struct nv30_miptree *)surface->texture; @@ -162,7 +163,7 @@ nv30_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, assert(surface_to_map); - map = ws->buffer_map(ws, surface_to_map->buffer, flags); + map = ws->buffer_map(ws, nv30mt->buffer, flags); if (!map) return NULL; @@ -174,6 +175,7 @@ nv30_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) { struct pipe_winsys *ws = screen->winsys; struct pipe_surface *surface_to_unmap; + struct nv30_miptree *nv30mt = (struct nv30_miptree *)surface->texture; /* TODO: Copy from shadow just before push buffer is flushed instead. There are probably some programs that map/unmap excessively @@ -190,7 +192,7 @@ nv30_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) assert(surface_to_unmap); - ws->buffer_unmap(ws, surface_to_unmap->buffer); + ws->buffer_unmap(ws, nv30mt->buffer); if (surface_to_unmap != surface) { struct nv30_screen *nvscreen = nv30_screen(screen); diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c index 8536acc570..77368cb205 100644 --- a/src/gallium/drivers/nv30/nv30_state_fb.c +++ b/src/gallium/drivers/nv30/nv30_state_fb.c @@ -12,6 +12,7 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; unsigned w = fb->width; unsigned h = fb->height; + struct nv30_miptree *nv30mt; rt_enable = 0; for (i = 0; i < fb->nr_cbufs; i++) { @@ -77,34 +78,37 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) pitch |= (pitch << 16); } + nv30mt = (struct nv30_miptree *)rt[0]->texture; so_method(so, nv30->screen->rankine, NV34TCL_DMA_COLOR0, 1); - so_reloc (so, rt[0]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + so_reloc (so, nv30mt->buffer, 0, rt_flags | NOUVEAU_BO_OR, nv30->nvws->channel->vram->handle, nv30->nvws->channel->gart->handle); so_method(so, nv30->screen->rankine, NV34TCL_COLOR0_PITCH, 2); so_data (so, pitch); - so_reloc (so, rt[0]->buffer, rt[0]->offset, rt_flags | + so_reloc (so, nv30mt->buffer, rt[0]->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); } if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) { + nv30mt = (struct nv30_miptree *)rt[1]->texture; so_method(so, nv30->screen->rankine, NV34TCL_DMA_COLOR1, 1); - so_reloc (so, rt[1]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + so_reloc (so, nv30mt->buffer, 0, rt_flags | NOUVEAU_BO_OR, nv30->nvws->channel->vram->handle, nv30->nvws->channel->gart->handle); so_method(so, nv30->screen->rankine, NV34TCL_COLOR1_OFFSET, 2); - so_reloc (so, rt[1]->buffer, rt[1]->offset, rt_flags | + so_reloc (so, nv30mt->buffer, rt[1]->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); so_data (so, rt[1]->stride); } if (zeta_format) { + nv30mt = (struct nv30_miptree *)zeta->texture; so_method(so, nv30->screen->rankine, NV34TCL_DMA_ZETA, 1); - so_reloc (so, zeta->buffer, 0, rt_flags | NOUVEAU_BO_OR, + so_reloc (so, nv30mt->buffer, 0, rt_flags | NOUVEAU_BO_OR, nv30->nvws->channel->vram->handle, nv30->nvws->channel->gart->handle); so_method(so, nv30->screen->rankine, NV34TCL_ZETA_OFFSET, 1); - so_reloc (so, zeta->buffer, zeta->offset, rt_flags | + so_reloc (so, nv30mt->buffer, zeta->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); /* TODO: allocate LMA depth buffer */ } diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c index ba912ddcbb..b1fba11d2f 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -144,7 +144,6 @@ nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, if (!ps) return NULL; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(pscreen, &ps->buffer, mt->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; @@ -182,7 +181,6 @@ nv40_miptree_surface_del(struct pipe_screen *pscreen, return; pipe_texture_reference(&ps->texture, NULL); - pipe_buffer_reference(pscreen, &ps->buffer, NULL); FREE(ps); } diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index 46fe133d71..41d342d27d 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -143,6 +143,7 @@ nv40_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, struct pipe_winsys *ws = screen->winsys; struct pipe_surface *surface_to_map; void *map; + struct nv40_miptree *mt; if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { struct nv40_miptree *mt = (struct nv40_miptree *)surface->texture; @@ -170,8 +171,8 @@ nv40_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, surface_to_map = surface; assert(surface_to_map); - - map = ws->buffer_map(ws, surface_to_map->buffer, flags); + mt = (struct nv40_miptree *)surface_to_map->texture; + map = ws->buffer_map(ws, mt->buffer, flags); if (!map) return NULL; @@ -183,6 +184,7 @@ nv40_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) { struct pipe_winsys *ws = screen->winsys; struct pipe_surface *surface_to_unmap; + struct nv40_miptree *mt; /* TODO: Copy from shadow just before push buffer is flushed instead. There are probably some programs that map/unmap excessively @@ -199,7 +201,8 @@ nv40_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) assert(surface_to_unmap); - ws->buffer_unmap(ws, surface_to_unmap->buffer); + mt = (struct nv40_miptree *)surface_to_unmap->texture; + ws->buffer_unmap(ws, mt->buffer); if (surface_to_unmap != surface) { struct nv40_screen *nvscreen = nv40_screen(screen); diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c index a2e09e18a4..454abad31f 100644 --- a/src/gallium/drivers/nv40/nv40_state_fb.c +++ b/src/gallium/drivers/nv40/nv40_state_fb.c @@ -1,6 +1,13 @@ #include "nv40_context.h" #include "nouveau/nouveau_util.h" +static struct pipe_buffer * +nv40_surface_buffer(struct pipe_surface *surface) +{ + struct nv40_miptree *mt = (struct nv40_miptree *)surface->texture; + return mt->buffer; +} + static boolean nv40_state_framebuffer_validate(struct nv40_context *nv40) { @@ -71,33 +78,33 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40) if (rt_enable & NV40TCL_RT_ENABLE_COLOR0) { so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR0, 1); - so_reloc (so, rt[0]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + so_reloc (so, nv40_surface_buffer(rt[0]), 0, rt_flags | NOUVEAU_BO_OR, nv40->nvws->channel->vram->handle, nv40->nvws->channel->gart->handle); so_method(so, nv40->screen->curie, NV40TCL_COLOR0_PITCH, 2); so_data (so, rt[0]->stride); - so_reloc (so, rt[0]->buffer, rt[0]->offset, rt_flags | + so_reloc (so, nv40_surface_buffer(rt[0]), rt[0]->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); } if (rt_enable & NV40TCL_RT_ENABLE_COLOR1) { so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR1, 1); - so_reloc (so, rt[1]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + so_reloc (so, nv40_surface_buffer(rt[1]), 0, rt_flags | NOUVEAU_BO_OR, nv40->nvws->channel->vram->handle, nv40->nvws->channel->gart->handle); so_method(so, nv40->screen->curie, NV40TCL_COLOR1_OFFSET, 2); - so_reloc (so, rt[1]->buffer, rt[1]->offset, rt_flags | + so_reloc (so, nv40_surface_buffer(rt[1]), rt[1]->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); so_data (so, rt[1]->stride); } if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) { so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR2, 1); - so_reloc (so, rt[2]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + so_reloc (so, nv40_surface_buffer(rt[2]), 0, rt_flags | NOUVEAU_BO_OR, nv40->nvws->channel->vram->handle, nv40->nvws->channel->gart->handle); so_method(so, nv40->screen->curie, NV40TCL_COLOR2_OFFSET, 1); - so_reloc (so, rt[2]->buffer, rt[2]->offset, rt_flags | + so_reloc (so, nv40_surface_buffer(rt[2]), rt[2]->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); so_method(so, nv40->screen->curie, NV40TCL_COLOR2_PITCH, 1); so_data (so, rt[2]->stride); @@ -105,11 +112,11 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40) if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) { so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR3, 1); - so_reloc (so, rt[3]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + so_reloc (so, nv40_surface_buffer(rt[3]), 0, rt_flags | NOUVEAU_BO_OR, nv40->nvws->channel->vram->handle, nv40->nvws->channel->gart->handle); so_method(so, nv40->screen->curie, NV40TCL_COLOR3_OFFSET, 1); - so_reloc (so, rt[3]->buffer, rt[3]->offset, rt_flags | + so_reloc (so, nv40_surface_buffer(rt[3]), rt[3]->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); so_method(so, nv40->screen->curie, NV40TCL_COLOR3_PITCH, 1); so_data (so, rt[3]->stride); @@ -117,11 +124,11 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40) if (zeta_format) { so_method(so, nv40->screen->curie, NV40TCL_DMA_ZETA, 1); - so_reloc (so, zeta->buffer, 0, rt_flags | NOUVEAU_BO_OR, + so_reloc (so, nv40_surface_buffer(zeta), 0, rt_flags | NOUVEAU_BO_OR, nv40->nvws->channel->vram->handle, nv40->nvws->channel->gart->handle); so_method(so, nv40->screen->curie, NV40TCL_ZETA_OFFSET, 1); - so_reloc (so, zeta->buffer, zeta->offset, rt_flags | + so_reloc (so, nv40_surface_buffer(zeta), zeta->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); so_method(so, nv40->screen->curie, NV40TCL_ZETA_PITCH, 1); so_data (so, zeta->stride); diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 061a4c064b..6c9e18429a 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -99,6 +99,13 @@ nv50_surface(struct pipe_surface *pt) return (struct nv50_surface *)pt; } +static INLINE struct pipe_buffer * +nv50_surface_buffer(struct pipe_surface *surface) +{ + struct nv50_miptree *mt = (struct nv50_miptree *)surface->texture; + return mt->buffer; +} + struct nv50_state { unsigned dirty; diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 7770fcc3f2..c6e65c9816 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -179,7 +179,7 @@ nv50_miptree_sync(struct pipe_screen *pscreen, struct nv50_miptree *mt, } /* The reverse of the above */ -void +static void nv50_miptree_sync_cpu(struct pipe_screen *pscreen, struct nv50_miptree *mt, unsigned level, unsigned image) { @@ -232,7 +232,6 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, if (!ps) return NULL; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(pscreen, &ps->buffer, mt->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; @@ -253,7 +252,6 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, ps->offset = 0; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(pscreen, &ps->buffer, lvl->image[img]); if (flags & PIPE_BUFFER_USAGE_CPU_WRITE) mark_dirty(lvl->image_dirty_cpu, img); @@ -262,7 +260,6 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, ps->offset = lvl->image_offset[img]; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(pscreen, &ps->buffer, mt->buffer); if (flags & PIPE_BUFFER_USAGE_GPU_WRITE) mark_dirty(lvl->image_dirty_gpu, img); @@ -282,7 +279,6 @@ nv50_miptree_surface_del(struct pipe_screen *pscreen, if (--ps->refcount <= 0) { pipe_texture_reference(&ps->texture, NULL); - pipe_buffer_reference(pscreen, &ps->buffer, NULL); FREE(s); } } diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 7686f746eb..b902c8cf53 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -970,7 +970,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i]; + const struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i]; if (fs->SrcRegister.File == TGSI_FILE_SAMPLER) unit = fs->SrcRegister.Index; diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 4dc4c04493..602d76ac74 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -46,9 +46,9 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_data (so, fb->cbufs[i]->height); so_method(so, tesla, NV50TCL_RT_ADDRESS_HIGH(i), 5); - so_reloc (so, fb->cbufs[i]->buffer, fb->cbufs[i]->offset, + so_reloc (so, nv50_surface_buffer(fb->cbufs[i]), fb->cbufs[i]->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, fb->cbufs[i]->buffer, fb->cbufs[i]->offset, + so_reloc (so, nv50_surface_buffer(fb->cbufs[i]), fb->cbufs[i]->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW, 0, 0); switch (fb->cbufs[i]->format) { case PIPE_FORMAT_A8R8G8B8_UNORM: @@ -81,9 +81,9 @@ nv50_state_validate_fb(struct nv50_context *nv50) } so_method(so, tesla, NV50TCL_ZETA_ADDRESS_HIGH, 5); - so_reloc (so, fb->zsbuf->buffer, fb->zsbuf->offset, + so_reloc (so, nv50_surface_buffer(fb->zsbuf), fb->zsbuf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, fb->zsbuf->buffer, fb->zsbuf->offset, + so_reloc (so, nv50_surface_buffer(fb->zsbuf), fb->zsbuf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW, 0, 0); switch (fb->zsbuf->format) { case PIPE_FORMAT_Z24S8_UNORM: diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index ed6602ba36..8ebbc84817 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -65,7 +65,7 @@ nv50_surface_map(struct pipe_screen *screen, struct pipe_surface *ps, { struct pipe_winsys *ws = screen->winsys; - return ws->buffer_map(ws, ps->buffer, flags); + return ws->buffer_map(ws, nv50_surface_buffer(ps), flags); } static void @@ -73,7 +73,7 @@ nv50_surface_unmap(struct pipe_screen *pscreen, struct pipe_surface *ps) { struct pipe_winsys *ws = pscreen->winsys; - ws->buffer_unmap(ws, ps->buffer); + ws->buffer_unmap(ws, nv50_surface_buffer(ps)); } void diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 5952378152..7af8398193 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -211,11 +211,9 @@ softpipe_get_tex_surface(struct pipe_screen *screen, assert(level <= pt->last_level); ps = CALLOC_STRUCT(pipe_surface); - ps->refcount = 1; if (ps) { - assert(ps->refcount); + ps->refcount = 1; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(screen, &ps->buffer, spt->buffer); ps->format = pt->format; ps->block = pt->block; ps->width = pt->width[level]; @@ -225,7 +223,7 @@ softpipe_get_tex_surface(struct pipe_screen *screen, ps->stride = spt->stride[level]; ps->offset = spt->level_offset[level]; ps->usage = usage; - + /* Because we are softpipe, anything that the state tracker * thought was going to be done with the GPU will actually get * done with the CPU. Let's adjust the flags to take that into @@ -274,8 +272,7 @@ softpipe_tex_surface_release(struct pipe_screen *screen, */ assert ((*s)->texture); if (--surf->refcount == 0) { - pipe_texture_reference(&surf->texture, NULL); - pipe_buffer_reference(screen, &surf->buffer, NULL); + pipe_texture_reference(&surf->texture, NULL); FREE(surf); } *s = NULL; @@ -288,13 +285,16 @@ softpipe_surface_map( struct pipe_screen *screen, unsigned flags ) { ubyte *map; + struct softpipe_texture *spt; if (flags & ~surface->usage) { assert(0); return NULL; } - map = pipe_buffer_map( screen, surface->buffer, flags ); + assert(surface->texture); + spt = softpipe_texture(surface->texture); + map = pipe_buffer_map(screen, spt->buffer, flags); if (map == NULL) return NULL; @@ -318,7 +318,12 @@ static void softpipe_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) { - pipe_buffer_unmap( screen, surface->buffer ); + struct softpipe_texture *spt; + + assert(surface->texture); + spt = softpipe_texture(surface->texture); + + pipe_buffer_unmap( screen, spt->buffer ); } diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 78b0efa46d..ab76009375 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -369,7 +369,7 @@ sp_flush_tile_cache(struct softpipe_context *softpipe, struct pipe_surface *ps = tc->surface; int inuse = 0, pos; - if (ps && ps->buffer) { + if (ps) { /* caching a drawing surface */ for (pos = 0; pos < NUM_ENTRIES; pos++) { struct softpipe_cached_tile *tile = tc->entries + pos; diff --git a/src/gallium/drivers/trace/tr_state.c b/src/gallium/drivers/trace/tr_state.c index b23ccc1a3d..524f2d6194 100644 --- a/src/gallium/drivers/trace/tr_state.c +++ b/src/gallium/drivers/trace/tr_state.c @@ -397,7 +397,6 @@ void trace_dump_surface(const struct pipe_surface *state) trace_dump_struct_begin("pipe_surface"); - trace_dump_member(ptr, state, buffer); trace_dump_member(format, state, format); trace_dump_member(uint, state, status); trace_dump_member(uint, state, clear_value); diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 13fa9ba848..dd0dfac238 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -276,7 +276,6 @@ struct pipe_sampler_state */ struct pipe_surface { - struct pipe_buffer *buffer; /**< surface's buffer/memory */ enum pipe_format format; /**< PIPE_FORMAT_x */ unsigned status; /**< PIPE_SURFACE_STATUS_x */ unsigned clear_value; /**< XXX may be temporary */ diff --git a/src/gallium/winsys/egl_xlib/egl_xlib.c b/src/gallium/winsys/egl_xlib/egl_xlib.c index 4876339107..c6b0e3d8c5 100644 --- a/src/gallium/winsys/egl_xlib/egl_xlib.c +++ b/src/gallium/winsys/egl_xlib/egl_xlib.c @@ -41,6 +41,7 @@ #include "pipe/internal/p_winsys_screen.h" #include "util/u_memory.h" #include "softpipe/sp_winsys.h" +#include "softpipe/sp_texture.h" #include "eglconfig.h" #include "eglconfigutil.h" @@ -292,7 +293,7 @@ display_surface(struct pipe_winsys *pws, assert(ximage->format); assert(ximage->bitmap_unit); - data = pws->buffer_map(pws, psurf->buffer, 0); + data = pws->buffer_map(pws, softpipe_texture(psurf->texture)->buffer, 0); /* update XImage's fields */ ximage->data = data; @@ -308,7 +309,7 @@ display_surface(struct pipe_winsys *pws, ximage->data = NULL; XDestroyImage(ximage); - pws->buffer_unmap(pws, psurf->buffer); + pws->buffer_unmap(pws, softpipe_texture(psurf->texture)->buffer); } diff --git a/src/gallium/winsys/xlib/Makefile b/src/gallium/winsys/xlib/Makefile index 5b71e6dc38..8c2892d49b 100644 --- a/src/gallium/winsys/xlib/Makefile +++ b/src/gallium/winsys/xlib/Makefile @@ -22,11 +22,11 @@ INCLUDE_DIRS = \ -I$(TOP)/src/gallium/state_trackers/glx/xlib \ -I$(TOP)/src/gallium/auxiliary -DEFINES = \ +DEFINES += \ -DGALLIUM_SOFTPIPE \ - -DGALLIUM_CELL \ -DGALLIUM_TRACE \ -DGALLIUM_BRW +#-DGALLIUM_CELL will be defined by the config */ XLIB_WINSYS_SOURCES = \ xlib.c \ diff --git a/src/gallium/winsys/xlib/xlib_brw_screen.c b/src/gallium/winsys/xlib/xlib_brw_screen.c index 5344c502ef..51740a9af6 100644 --- a/src/gallium/winsys/xlib/xlib_brw_screen.c +++ b/src/gallium/winsys/xlib/xlib_brw_screen.c @@ -42,12 +42,19 @@ #include "util/u_memory.h" #include "i965simple/brw_winsys.h" #include "i965simple/brw_screen.h" +#include "i965simple/brw_context.h" + #include "xlib_brw_aub.h" #include "xlib_brw.h" #include "xlib.h" - +static struct pipe_buffer * +buffer_from_surface(struct pipe_surface *surface) +{ + struct brw_texture *texture = (struct brw_texture *)surface; + return texture->buffer; +} struct aub_buffer { char *data; @@ -226,7 +233,7 @@ aub_flush_frontbuffer( struct pipe_winsys *winsys, // struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys); brw_aub_dump_bmp( global_winsys->aubfile, surface, - aub_bo(surface->buffer)->offset ); + aub_bo(buffer_from_surface(surface))->offset ); } @@ -449,7 +456,7 @@ xlib_brw_display_surface(struct xmesa_buffer *b, { brw_aub_dump_bmp( global_winsys->aubfile, surf, - aub_bo(surf->buffer)->offset ); + aub_bo(buffer_from_surface(surf))->offset ); } diff --git a/src/gallium/winsys/xlib/xlib_cell.c b/src/gallium/winsys/xlib/xlib_cell.c index bf69593c5c..c87564f4dc 100644 --- a/src/gallium/winsys/xlib/xlib_cell.c +++ b/src/gallium/winsys/xlib/xlib_cell.c @@ -51,6 +51,7 @@ #include "cell/ppu/cell_context.h" #include "cell/ppu/cell_screen.h" #include "cell/ppu/cell_winsys.h" +#include "cell/ppu/cell_texture.h" /** @@ -164,7 +165,8 @@ static void xlib_cell_display_surface(struct xmesa_buffer *b, struct pipe_surface *surf) { XImage *ximage; - struct xm_buffer *xm_buf = xm_buffer(surf->buffer); + struct xm_buffer *xm_buf = xm_buffer( + cell_texture(surf->texture)->buffer); const uint tilesPerRow = (surf->width + TILE_SIZE - 1) / TILE_SIZE; uint x, y; diff --git a/src/gallium/winsys/xlib/xlib_softpipe.c b/src/gallium/winsys/xlib/xlib_softpipe.c index 01d24584e2..586e1dfca5 100644 --- a/src/gallium/winsys/xlib/xlib_softpipe.c +++ b/src/gallium/winsys/xlib/xlib_softpipe.c @@ -45,6 +45,7 @@ #include "util/u_math.h" #include "util/u_memory.h" #include "softpipe/sp_winsys.h" +#include "softpipe/sp_texture.h" #include "xlib.h" @@ -58,7 +59,7 @@ struct xm_buffer boolean userBuffer; /** Is this a user-space buffer? */ void *data; void *mapped; - + XImage *tempImage; int shm; XShmSegmentInfo shminfo; @@ -225,11 +226,12 @@ xm_buffer_destroy(struct pipe_winsys *pws, * by the XMesaBuffer. */ static void -xlib_softpipe_display_surface(struct xmesa_buffer *b, +xlib_softpipe_display_surface(struct xmesa_buffer *b, struct pipe_surface *surf) { XImage *ximage; - struct xm_buffer *xm_buf = xm_buffer(surf->buffer); + struct xm_buffer *xm_buf = xm_buffer( + softpipe_texture(surf->texture)->buffer); static boolean no_swap = 0; static boolean firsttime = 1; diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index d18946de7d..963ac902d2 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -171,7 +171,6 @@ st_renderbuffer_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb, surface_usage ); assert(strb->surface->texture); - assert(strb->surface->buffer); assert(strb->surface->format); assert(strb->surface->block.size); assert(strb->surface->block.width); diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index 9e5c35072a..6a3455aaba 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -128,11 +128,11 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target, dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice, PIPE_BUFFER_USAGE_CPU_WRITE); - srcData = (ubyte *) pipe_buffer_map(pipe->screen, srcSurf->buffer, - PIPE_BUFFER_USAGE_CPU_READ) + srcData = (ubyte *) pipe_surface_map(srcSurf, + PIPE_BUFFER_USAGE_CPU_READ) + srcSurf->offset; - dstData = (ubyte *) pipe_buffer_map(pipe->screen, dstSurf->buffer, - PIPE_BUFFER_USAGE_CPU_WRITE) + dstData = (ubyte *) pipe_surface_map(dstSurf, + PIPE_BUFFER_USAGE_CPU_WRITE) + dstSurf->offset; _mesa_generate_mipmap_level(target, datatype, comps, @@ -144,8 +144,8 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target, dstData, dstSurf->stride); /* stride in bytes */ - pipe_buffer_unmap(pipe->screen, srcSurf->buffer); - pipe_buffer_unmap(pipe->screen, dstSurf->buffer); + pipe_surface_unmap(srcSurf); + pipe_surface_unmap(dstSurf); pipe_surface_reference(&srcSurf, NULL); pipe_surface_reference(&dstSurf, NULL); -- cgit v1.2.3 From 75f0b38d9ea4a7318b0d661712dda15e24707395 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 5 Feb 2009 18:40:38 +1000 Subject: nv50: stop using nouveau_push.h, it's evil --- src/gallium/drivers/nv50/nv50_clear.c | 28 ++++++------ src/gallium/drivers/nv50/nv50_context.c | 2 +- src/gallium/drivers/nv50/nv50_context.h | 4 -- src/gallium/drivers/nv50/nv50_program.c | 23 ++++++---- src/gallium/drivers/nv50/nv50_query.c | 25 ++++++---- src/gallium/drivers/nv50/nv50_vbo.c | 81 +++++++++++++++++++-------------- 6 files changed, 92 insertions(+), 71 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_clear.c b/src/gallium/drivers/nv50/nv50_clear.c index 6380f397ea..f9bc3b53ca 100644 --- a/src/gallium/drivers/nv50/nv50_clear.c +++ b/src/gallium/drivers/nv50/nv50_clear.c @@ -31,6 +31,8 @@ nv50_clear(struct pipe_context *pipe, struct pipe_surface *ps, unsigned clearValue) { struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; struct pipe_framebuffer_state fb, s_fb = nv50->framebuffer; struct pipe_scissor_state sc, s_sc = nv50->scissor; unsigned dirty = nv50->dirty; @@ -59,21 +61,21 @@ nv50_clear(struct pipe_context *pipe, struct pipe_surface *ps, switch (ps->format) { case PIPE_FORMAT_A8R8G8B8_UNORM: - BEGIN_RING(tesla, 0x0d80, 4); - OUT_RINGf (ubyte_to_float((clearValue >> 16) & 0xff)); - OUT_RINGf (ubyte_to_float((clearValue >> 8) & 0xff)); - OUT_RINGf (ubyte_to_float((clearValue >> 0) & 0xff)); - OUT_RINGf (ubyte_to_float((clearValue >> 24) & 0xff)); - BEGIN_RING(tesla, 0x19d0, 1); - OUT_RING (0x3c); + BEGIN_RING(chan, tesla, 0x0d80, 4); + OUT_RINGf (chan, ubyte_to_float((clearValue >> 16) & 0xff)); + OUT_RINGf (chan, ubyte_to_float((clearValue >> 8) & 0xff)); + OUT_RINGf (chan, ubyte_to_float((clearValue >> 0) & 0xff)); + OUT_RINGf (chan, ubyte_to_float((clearValue >> 24) & 0xff)); + BEGIN_RING(chan, tesla, 0x19d0, 1); + OUT_RING (chan, 0x3c); break; case PIPE_FORMAT_Z24S8_UNORM: - BEGIN_RING(tesla, 0x0d90, 1); - OUT_RINGf ((float)(clearValue >> 8) * (1.0 / 16777215.0)); - BEGIN_RING(tesla, 0x0da0, 1); - OUT_RING (clearValue & 0xff); - BEGIN_RING(tesla, 0x19d0, 1); - OUT_RING (0x03); + BEGIN_RING(chan, tesla, 0x0d90, 1); + OUT_RINGf (chan, (float)(clearValue >> 8) * (1.0 / 16777215.0)); + BEGIN_RING(chan, tesla, 0x0da0, 1); + OUT_RING (chan, clearValue & 0xff); + BEGIN_RING(chan, tesla, 0x19d0, 1); + OUT_RING (chan, 0x03); break; default: pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 99776239d2..565a5da668 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -33,7 +33,7 @@ nv50_flush(struct pipe_context *pipe, unsigned flags, { struct nv50_context *nv50 = (struct nv50_context *)pipe; - FIRE_RING(fence); + FIRE_RING(nv50->screen->nvws->channel); } static void diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 6c9e18429a..6a11572590 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -15,10 +15,6 @@ #include "nouveau/nouveau_gldefs.h" #include "nouveau/nouveau_stateobj.h" -#define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv50_screen *ctx = nv50->screen -#include "nouveau/nouveau_push.h" - #include "nv50_screen.h" #include "nv50_program.h" diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index b902c8cf53..14c5d47e79 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1545,13 +1545,16 @@ static void nv50_program_upload_data(struct nv50_context *nv50, float *map, unsigned start, unsigned count) { + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + while (count) { unsigned nr = count > 2047 ? 2047 : count; - BEGIN_RING(tesla, 0x00000f00, 1); - OUT_RING ((NV50_CB_PMISC << 0) | (start << 8)); - BEGIN_RING(tesla, 0x40000f04, nr); - OUT_RINGp (map, nr); + BEGIN_RING(chan, tesla, 0x00000f00, 1); + OUT_RING (chan, (NV50_CB_PMISC << 0) | (start << 8)); + BEGIN_RING(chan, tesla, 0x40000f04, nr); + OUT_RINGp (chan, map, nr); map += nr; start += nr; @@ -1598,6 +1601,8 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) static void nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) { + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; struct pipe_winsys *ws = nv50->pipe.winsys; struct nv50_program_exec *e; struct nouveau_stateobj *so; @@ -1664,14 +1669,14 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) nr = MIN2(count, 2047); nr = MIN2(nvws->channel->pushbuf->remaining, nr); if (nvws->channel->pushbuf->remaining < (nr + 3)) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } - BEGIN_RING(tesla, 0x0f00, 1); - OUT_RING ((start << 8) | NV50_CB_PUPLOAD); - BEGIN_RING(tesla, 0x40000f04, nr); - OUT_RINGp (up + start, nr); + BEGIN_RING(chan, tesla, 0x0f00, 1); + OUT_RING (chan, (start << 8) | NV50_CB_PUPLOAD); + BEGIN_RING(chan, tesla, 0x40000f04, nr); + OUT_RINGp (chan, up + start, nr); start += nr; count -= nr; diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c index 1b3a41340a..20745ceab8 100644 --- a/src/gallium/drivers/nv50/nv50_query.c +++ b/src/gallium/drivers/nv50/nv50_query.c @@ -71,12 +71,14 @@ static void nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) { struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; struct nv50_query *q = nv50_query(pq); - BEGIN_RING(tesla, 0x1530, 1); - OUT_RING (1); - BEGIN_RING(tesla, 0x1514, 1); - OUT_RING (1); + BEGIN_RING(chan, tesla, 0x1530, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, tesla, 0x1514, 1); + OUT_RING (chan, 1); q->ready = FALSE; } @@ -85,14 +87,17 @@ static void nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq) { struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; struct nv50_query *q = nv50_query(pq); - BEGIN_RING(tesla, 0x1b00, 4); - OUT_RELOCh(q->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RELOCl(q->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RING (0x00000000); - OUT_RING (0x0100f002); - FIRE_RING (NULL); + WAIT_RING (chan, 5); + BEGIN_RING(chan, tesla, 0x1b00, 4); + OUT_RELOCh(chan, q->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, q->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x0100f002); + FIRE_RING (chan); } static boolean diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index c482a4c241..0c970adb03 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -53,25 +53,27 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; nv50_state_validate(nv50); - BEGIN_RING(tesla, 0x142c, 1); - OUT_RING (0); - BEGIN_RING(tesla, 0x142c, 1); - OUT_RING (0); - BEGIN_RING(tesla, 0x1440, 1); - OUT_RING (0); - BEGIN_RING(tesla, 0x1334, 1); - OUT_RING (0); - - BEGIN_RING(tesla, NV50TCL_VERTEX_BEGIN, 1); - OUT_RING (nv50_prim(mode)); - BEGIN_RING(tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2); - OUT_RING (start); - OUT_RING (count); - BEGIN_RING(tesla, NV50TCL_VERTEX_END, 1); - OUT_RING (0); + BEGIN_RING(chan, tesla, 0x142c, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, tesla, 0x142c, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, tesla, 0x1440, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, tesla, 0x1334, 1); + OUT_RING (chan, 0); + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); + OUT_RING (chan, nv50_prim(mode)); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2); + OUT_RING (chan, start); + OUT_RING (chan, count); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); + OUT_RING (chan, 0); pipe->flush(pipe, 0, NULL); return TRUE; @@ -81,11 +83,14 @@ static INLINE void nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, unsigned start, unsigned count) { + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + map += start; if (count & 1) { - BEGIN_RING(tesla, 0x15e8, 1); - OUT_RING (map[0]); + BEGIN_RING(chan, tesla, 0x15e8, 1); + OUT_RING (chan, map[0]); map++; count--; } @@ -94,9 +99,9 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, unsigned nr = count > 2046 ? 2046 : count; int i; - BEGIN_RING(tesla, 0x400015f0, nr >> 1); + BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1); for (i = 0; i < nr; i += 2) - OUT_RING ((map[1] << 16) | map[0]); + OUT_RING (chan, (map[1] << 16) | map[0]); count -= nr; map += nr; @@ -107,11 +112,14 @@ static INLINE void nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, unsigned start, unsigned count) { + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + map += start; if (count & 1) { - BEGIN_RING(tesla, 0x15e8, 1); - OUT_RING (map[0]); + BEGIN_RING(chan, tesla, 0x15e8, 1); + OUT_RING (chan, map[0]); map++; count--; } @@ -120,9 +128,9 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, unsigned nr = count > 2046 ? 2046 : count; int i; - BEGIN_RING(tesla, 0x400015f0, nr >> 1); + BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1); for (i = 0; i < nr; i += 2) - OUT_RING ((map[1] << 16) | map[0]); + OUT_RING (chan, (map[1] << 16) | map[0]); count -= nr; map += nr; @@ -133,13 +141,16 @@ static INLINE void nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint8_t *map, unsigned start, unsigned count) { + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + map += start; while (count) { unsigned nr = count > 2047 ? 2047 : count; - BEGIN_RING(tesla, 0x400015e8, nr); - OUT_RINGp (map, nr); + BEGIN_RING(chan, tesla, 0x400015e8, nr); + OUT_RINGp (chan, map, nr); count -= nr; map += nr; @@ -152,18 +163,20 @@ nv50_draw_elements(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; struct pipe_winsys *ws = pipe->winsys; void *map = ws->buffer_map(ws, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); nv50_state_validate(nv50); - BEGIN_RING(tesla, 0x142c, 1); - OUT_RING (0); - BEGIN_RING(tesla, 0x142c, 1); - OUT_RING (0); + BEGIN_RING(chan, tesla, 0x142c, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, tesla, 0x142c, 1); + OUT_RING (chan, 0); - BEGIN_RING(tesla, NV50TCL_VERTEX_BEGIN, 1); - OUT_RING (nv50_prim(mode)); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); + OUT_RING (chan, nv50_prim(mode)); switch (indexSize) { case 1: nv50_draw_elements_inline_u08(nv50, map, start, count); @@ -177,8 +190,8 @@ nv50_draw_elements(struct pipe_context *pipe, default: assert(0); } - BEGIN_RING(tesla, NV50TCL_VERTEX_END, 1); - OUT_RING (0); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); + OUT_RING (chan, 0); pipe->flush(pipe, 0, NULL); return TRUE; -- cgit v1.2.3