From 716c1cd2ecbc1e86c0fd747c9fa9e095ded5fd5d Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sun, 1 Jun 2008 23:10:31 +1000 Subject: nv50: use "real" constbufs for shaders + tcb uploads --- src/gallium/drivers/nv50/nv50_context.h | 14 ++++++- src/gallium/drivers/nv50/nv50_program.c | 27 +++++------- src/gallium/drivers/nv50/nv50_screen.c | 57 +++++++++++++++----------- src/gallium/drivers/nv50/nv50_screen.h | 3 ++ src/gallium/drivers/nv50/nv50_state.c | 4 +- src/gallium/drivers/nv50/nv50_state.h | 1 - src/gallium/drivers/nv50/nv50_state_validate.c | 29 +++++++++++++ 7 files changed, 90 insertions(+), 45 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index d4d716b94b..c4a8a4c064 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -23,6 +23,14 @@ #define NOUVEAU_MSG(fmt, args...) \ fprintf(stderr, "nouveau: "fmt, ##args); +/* Constant buffer assignment */ +#define NV50_CB_PMISC 0 +#define NV50_CB_PVP 1 +#define NV50_CB_PFP 2 +#define NV50_CB_PGP 3 +#define NV50_CB_TIC 4 +#define NV50_CB_TSC 5 + #define NV50_NEW_BLEND (1 << 0) #define NV50_NEW_ZSA (1 << 1) #define NV50_NEW_BLEND_COLOUR (1 << 2) @@ -32,8 +40,10 @@ #define NV50_NEW_RASTERIZER (1 << 6) #define NV50_NEW_FRAMEBUFFER (1 << 7) #define NV50_NEW_VERTPROG (1 << 8) -#define NV50_NEW_FRAGPROG (1 << 9) -#define NV50_NEW_ARRAYS (1 << 10) +#define NV50_NEW_VERTPROG_CB (1 << 9) +#define NV50_NEW_FRAGPROG (1 << 10) +#define NV50_NEW_FRAGPROG_CB (1 << 11) +#define NV50_NEW_ARRAYS (1 << 12) struct nv50_blend_stateobj { struct pipe_blend_state pipe; diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 30953b7d8a..0a43646923 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -179,6 +179,10 @@ emit(struct nv50_pc *pc, unsigned op, struct nv50_reg *dst, if (src1) { if (src1->type == P_CONST || src1->type == P_IMMD) { + if (src1->type == P_IMMD) + inst[1] |= (NV50_CB_PMISC << 22); + else + inst[1] |= (NV50_CB_PVP << 22); inst[0] |= 0x00800000; /* src1 is const */ /*XXX: does src1 come from "src2" now? */ alloc_reg(pc, src1); @@ -196,6 +200,10 @@ emit(struct nv50_pc *pc, unsigned op, struct nv50_reg *dst, if (src2) { if (src2->type == P_CONST || src2->type == P_IMMD) { + if (src2->type == P_IMMD) + inst[1] |= (NV50_CB_PMISC << 22); + else + inst[1] |= (NV50_CB_PVP << 22); inst[0] |= 0x01000000; /* src2 is const */ inst[1] |= (src2->hw << 14); } else { @@ -526,7 +534,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) } if (pc->immd_nr) { - int rid = pc->param_nr * 4; + int rid = 0; pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg)); if (!pc->immd) @@ -581,7 +589,6 @@ nv50_program_tx(struct nv50_program *p) } } - p->param_nr = pc->param_nr * 4; p->immd_nr = pc->immd_nr * 4; p->immd = pc->immd_buf; @@ -654,23 +661,9 @@ nv50_vertprog_validate(struct nv50_context *nv50) memcpy(map, p->insns, p->insns_nr * 4); ws->buffer_unmap(ws, p->buffer); - if (p->param_nr) { - float *cb; - - cb = ws->buffer_map(ws, nv50->constbuf[PIPE_SHADER_VERTEX], - PIPE_BUFFER_USAGE_CPU_READ); - for (i = 0; i < p->param_nr; i++) { - BEGIN_RING(tesla, 0x0f00, 2); - OUT_RING (i << 8); - OUT_RING (fui(cb[i])); - } - ws->buffer_unmap(ws, nv50->constbuf[PIPE_SHADER_VERTEX]); - } - - for (i = 0; i < p->immd_nr; i++) { BEGIN_RING(tesla, 0x0f00, 2); - OUT_RING ((p->param_nr + i) << 8); + OUT_RING ((NV50_CB_PMISC << 16) | (i << 8)); OUT_RING (fui(p->immd[i])); } diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index fc3eeed913..6c0810a9cf 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -203,14 +203,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) return NULL; } - /* Static constant buffer */ - screen->constbuf = ws->buffer_create(ws, 0, 0, 256 * 4 * 4); - if (nvws->res_init(&screen->vp_data_heap, 0, 256)) { - NOUVEAU_ERR("Error initialising constant buffer\n"); - nv50_screen_destroy(&screen->pipe); - return NULL; - } - /* Static tesla init */ so = so_new(256, 20); @@ -245,37 +237,56 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) so_method(so, screen->tesla, 0x16b8, 1); so_data (so, 8); + /* Shared constant buffer */ + screen->constbuf = ws->buffer_create(ws, 0, 0, 256 * 4 * 4); + if (nvws->res_init(&screen->vp_data_heap, 0, 256)) { + NOUVEAU_ERR("Error initialising constant buffer\n"); + nv50_screen_destroy(&screen->pipe); + return NULL; + } + so_method(so, screen->tesla, 0x1280, 3); so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_data (so, 0x00001000); + so_data (so, (NV50_CB_PMISC << 16) | 0x00001000); + + /* Texture sampler/image unit setup - we abuse the constant buffer + * upload mechanism for the moment to upload data to the tex config + * blocks. At some point we *may* want to go the NVIDIA way of doing + * things? + */ + screen->tic = ws->buffer_create(ws, 0, 0, 32 * 8 * 4); so_method(so, screen->tesla, 0x1280, 3); - so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_data (so, 0x00014000); - so_method(so, screen->tesla, 0x1280, 3); - so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + so_data (so, (NV50_CB_TIC << 16) | 0x0800); + so_method(so, screen->tesla, 0x1574, 3); + so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_data (so, 0x00024000); + so_data (so, 0x00000800); + + screen->tsc = ws->buffer_create(ws, 0, 0, 32 * 8 * 4); so_method(so, screen->tesla, 0x1280, 3); - so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_data (so, 0x00034000); - so_method(so, screen->tesla, 0x1280, 3); - so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + so_data (so, (NV50_CB_TSC << 16) | 0x0800); + so_method(so, screen->tesla, 0x155c, 3); + so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_data (so, 0x00040100); + so_data (so, 0x00000800); + + /* Vertex array limits - max them out */ for (i = 0; i < 16; i++) { so_method(so, screen->tesla, 0x1080 + (i * 8), 2); so_data (so, 0x000000ff); diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index d63dd48508..5acb5003ba 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -15,6 +15,9 @@ struct nv50_screen { struct pipe_buffer *constbuf; struct nouveau_resource *vp_data_heap; + + struct pipe_buffer *tic; + struct pipe_buffer *tsc; }; static INLINE struct nv50_screen * diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index fd10a38378..ba3d04cede 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -406,11 +406,11 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, if (shader == PIPE_SHADER_VERTEX) { nv50->constbuf[PIPE_SHADER_VERTEX] = buf->buffer; - nv50->dirty |= NV50_NEW_VERTPROG; + nv50->dirty |= NV50_NEW_VERTPROG_CB; } else if (shader == PIPE_SHADER_FRAGMENT) { nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf->buffer; - nv50->dirty |= NV50_NEW_FRAGPROG; + nv50->dirty |= NV50_NEW_FRAGPROG_CB; } } diff --git a/src/gallium/drivers/nv50/nv50_state.h b/src/gallium/drivers/nv50/nv50_state.h index be0c75ad6e..9e3876871b 100644 --- a/src/gallium/drivers/nv50/nv50_state.h +++ b/src/gallium/drivers/nv50/nv50_state.h @@ -20,7 +20,6 @@ struct nv50_program { struct pipe_buffer *buffer; - unsigned param_nr; float *immd; unsigned immd_nr; diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 4a548378b7..05395c6df7 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -168,6 +168,35 @@ nv50_state_validate(struct nv50_context *nv50) so_ref(NULL, &so); } + if (nv50->dirty & NV50_NEW_VERTPROG_CB) { + so = so_new(4, 2); + so_method(so, tesla, 0x1280, 3); + so_reloc (so, nv50->constbuf[PIPE_SHADER_VERTEX], 0, + NOUVEAU_BO_HIGH | NOUVEAU_BO_RD | NOUVEAU_BO_VRAM, + 0, 0); + so_reloc (so, nv50->constbuf[PIPE_SHADER_VERTEX], 0, + NOUVEAU_BO_LOW | NOUVEAU_BO_RD | NOUVEAU_BO_VRAM, + 0, 0); + so_data (so, (NV50_CB_PVP << 16) | 0x1000); + so_emit(nvws, so); + so_ref(NULL, &so); + } + + if (nv50->dirty & NV50_NEW_FRAGPROG_CB) { + so = so_new(4, 2); + so_method(so, tesla, 0x1280, 3); + so_reloc (so, nv50->constbuf[PIPE_SHADER_FRAGMENT], 0, + NOUVEAU_BO_HIGH | NOUVEAU_BO_RD | NOUVEAU_BO_VRAM, + 0, 0); + so_reloc (so, nv50->constbuf[PIPE_SHADER_FRAGMENT], 0, + NOUVEAU_BO_LOW | NOUVEAU_BO_RD | NOUVEAU_BO_VRAM, + 0, 0); + so_data (so, (NV50_CB_PFP << 16) | 0x1000); + so_emit(nvws, so); + so_ref(NULL, &so); + } + + if (nv50->dirty & NV50_NEW_ARRAYS) nv50_vbo_validate(nv50); -- cgit v1.2.3