From f9eafeca297497a94c438ea28ed59f3a45ed2566 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sun, 21 Feb 2010 11:17:55 +0100 Subject: nv30, nv40: non-trivially unify nv[34]0_draw.c nv30_draw.c is a stub. This patch makes both nv30 and nv40 use the nv40 swtnl path. Note that this doesn't actually work on nv30 because the vertex program is encoded in the nv40-only layout. However, swtnl was unimplemented before on nv30, so this is not a regression. Furthermore, a patch to fix this is available near the end of the patchset. --- src/gallium/drivers/nvfx/Makefile | 1 + src/gallium/drivers/nvfx/nvfx_context.h | 8 + src/gallium/drivers/nvfx/nvfx_draw.c | 363 +++++++++++++++++++++++++++++ src/gallium/drivers/nvfx/nvfx_state_emit.c | 6 +- 4 files changed, 377 insertions(+), 1 deletion(-) create mode 100644 src/gallium/drivers/nvfx/nvfx_draw.c (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile index f8c10a249b..a23d024f11 100644 --- a/src/gallium/drivers/nvfx/Makefile +++ b/src/gallium/drivers/nvfx/Makefile @@ -5,6 +5,7 @@ LIBNAME = nvfx C_SOURCES = \ nvfx_clear.c \ + nvfx_draw.c \ nvfx_fragprog.c \ nvfx_miptree.c \ nvfx_query.c \ diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index 0a7a0f1252..75008f8ddd 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -190,6 +190,7 @@ extern struct nvfx_state_entry nvfx_state_scissor; extern struct nvfx_state_entry nvfx_state_sr; extern struct nvfx_state_entry nvfx_state_stipple; extern struct nvfx_state_entry nvfx_state_viewport; +extern struct nvfx_state_entry nvfx_state_vtxfmt; extern struct nvfx_state_entry nvfx_state_zsa; extern void nvfx_init_query_functions(struct nvfx_context *nvfx); @@ -199,6 +200,13 @@ extern void nvfx_init_surface_functions(struct nvfx_context *nvfx); extern void nvfx_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, double depth, unsigned stencil); +/* nvfx_draw.c */ +extern struct draw_stage *nvfx_draw_render_stage(struct nvfx_context *nvfx); +extern void nvfx_draw_elements_swtnl(struct pipe_context *pipe, + struct pipe_buffer *idxbuf, + unsigned ib_size, unsigned mode, + unsigned start, unsigned count); + /* nvfx_fragprog.c */ extern void nvfx_fragprog_destroy(struct nvfx_context *, struct nvfx_fragment_program *); diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c new file mode 100644 index 0000000000..8700e14329 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_draw.c @@ -0,0 +1,363 @@ +#include "pipe/p_shader_tokens.h" +#include "util/u_inlines.h" + +#include "util/u_pack_color.h" + +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pipe.h" + +#include "nvfx_context.h" +#define NVFX_SHADER_NO_FUCKEDNESS +#include "nv30/nv30_shader.h" +#include "nv40/nv40_shader.h" + +/* Simple, but crappy, swtnl path, hopefully we wont need to hit this very + * often at all. Uses "quadro style" vertex submission + a fixed vertex + * layout to avoid the need to generate a vertex program or vtxfmt. + */ + +struct nvfx_render_stage { + struct draw_stage stage; + struct nvfx_context *nvfx; + unsigned prim; +}; + +static INLINE struct nvfx_render_stage * +nvfx_render_stage(struct draw_stage *stage) +{ + return (struct nvfx_render_stage *)stage; +} + +static INLINE void +nvfx_render_vertex(struct nvfx_context *nvfx, const struct vertex_header *v) +{ + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + unsigned i; + + for (i = 0; i < nvfx->swtnl.nr_attribs; i++) { + unsigned idx = nvfx->swtnl.draw[i]; + unsigned hw = nvfx->swtnl.hw[i]; + + switch (nvfx->swtnl.emit[i]) { + case EMIT_OMIT: + break; + case EMIT_1F: + BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_1F(hw), 1); + OUT_RING (chan, fui(v->data[idx][0])); + break; + case EMIT_2F: + BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_2F_X(hw), 2); + OUT_RING (chan, fui(v->data[idx][0])); + OUT_RING (chan, fui(v->data[idx][1])); + break; + case EMIT_3F: + BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_3F_X(hw), 3); + OUT_RING (chan, fui(v->data[idx][0])); + OUT_RING (chan, fui(v->data[idx][1])); + OUT_RING (chan, fui(v->data[idx][2])); + break; + case EMIT_4F: + BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4F_X(hw), 4); + OUT_RING (chan, fui(v->data[idx][0])); + OUT_RING (chan, fui(v->data[idx][1])); + OUT_RING (chan, fui(v->data[idx][2])); + OUT_RING (chan, fui(v->data[idx][3])); + break; + case EMIT_4UB: + BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4UB(hw), 1); + OUT_RING (chan, pack_ub4(float_to_ubyte(v->data[idx][0]), + float_to_ubyte(v->data[idx][1]), + float_to_ubyte(v->data[idx][2]), + float_to_ubyte(v->data[idx][3]))); + break; + default: + assert(0); + break; + } + } +} + +static INLINE void +nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim, + unsigned mode, unsigned count) +{ + struct nvfx_render_stage *rs = nvfx_render_stage(stage); + struct nvfx_context *nvfx = rs->nvfx; + + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + unsigned i; + + /* Ensure there's room for 4xfloat32 + potentially 3 begin/end */ + if (AVAIL_RING(chan) < ((count * 20) + 6)) { + if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) { + NOUVEAU_ERR("AIII, missed flush\n"); + assert(0); + } + FIRE_RING(chan); + nvfx_state_emit(nvfx); + } + + /* Switch primitive modes if necessary */ + if (rs->prim != mode) { + if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) { + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, NV34TCL_VERTEX_BEGIN_END_STOP); + } + + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, mode); + rs->prim = mode; + } + + /* Emit vertex data */ + for (i = 0; i < count; i++) + nvfx_render_vertex(nvfx, prim->v[i]); + + /* If it's likely we'll need to empty the push buffer soon, finish + * off the primitive now. + */ + if (AVAIL_RING(chan) < ((count * 20) + 6)) { + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, NV34TCL_VERTEX_BEGIN_END_STOP); + rs->prim = NV34TCL_VERTEX_BEGIN_END_STOP; + } +} + +static void +nvfx_render_point(struct draw_stage *draw, struct prim_header *prim) +{ + nvfx_render_prim(draw, prim, NV34TCL_VERTEX_BEGIN_END_POINTS, 1); +} + +static void +nvfx_render_line(struct draw_stage *draw, struct prim_header *prim) +{ + nvfx_render_prim(draw, prim, NV34TCL_VERTEX_BEGIN_END_LINES, 2); +} + +static void +nvfx_render_tri(struct draw_stage *draw, struct prim_header *prim) +{ + nvfx_render_prim(draw, prim, NV34TCL_VERTEX_BEGIN_END_TRIANGLES, 3); +} + +static void +nvfx_render_flush(struct draw_stage *draw, unsigned flags) +{ + struct nvfx_render_stage *rs = nvfx_render_stage(draw); + struct nvfx_context *nvfx = rs->nvfx; + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + + if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) { + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, NV34TCL_VERTEX_BEGIN_END_STOP); + rs->prim = NV34TCL_VERTEX_BEGIN_END_STOP; + } +} + +static void +nvfx_render_reset_stipple_counter(struct draw_stage *draw) +{ +} + +static void +nvfx_render_destroy(struct draw_stage *draw) +{ + FREE(draw); +} + +static INLINE void +emit_mov(struct nvfx_vertex_program *vp, + unsigned dst, unsigned src, unsigned vor, unsigned mask) +{ + struct nvfx_vertex_program_exec *inst; + + vp->insns = realloc(vp->insns, + sizeof(struct nvfx_vertex_program_exec) * + ++vp->nr_insns); + inst = &vp->insns[vp->nr_insns - 1]; + + inst->data[0] = 0x401f9c6c; + inst->data[1] = 0x0040000d | (src << 8); + inst->data[2] = 0x8106c083; + inst->data[3] = 0x6041ff80 | (dst << 2) | (mask << 13); + inst->const_index = -1; + inst->has_branch_offset = FALSE; + + vp->ir |= (1 << src); + if (vor != ~0) + vp->or |= (1 << vor); +} + +static struct nvfx_vertex_program * +create_drawvp(struct nvfx_context *nvfx) +{ + struct nvfx_vertex_program *vp = CALLOC_STRUCT(nvfx_vertex_program); + unsigned i; + + // nv30 support comes in a later patch + assert(nvfx->is_nv4x); + + emit_mov(vp, NVFX_VP(INST_DEST_POS), 0, ~0, 0xf); + emit_mov(vp, NVFX_VP(INST_DEST_COL0), 3, 0, 0xf); + emit_mov(vp, NVFX_VP(INST_DEST_COL1), 4, 1, 0xf); + emit_mov(vp, NVFX_VP(INST_DEST_BFC0), 3, 2, 0xf); + emit_mov(vp, NVFX_VP(INST_DEST_BFC1), 4, 3, 0xf); + emit_mov(vp, NVFX_VP(INST_DEST_FOGC), 5, 4, 0x8); + for (i = 0; i < 8; i++) + emit_mov(vp, NVFX_VP(INST_DEST_TC(i)), 8 + i, 14 + i, 0xf); + + vp->insns[vp->nr_insns - 1].data[3] |= 1; + vp->translated = TRUE; + return vp; +} + +struct draw_stage * +nvfx_draw_render_stage(struct nvfx_context *nvfx) +{ + struct nvfx_render_stage *render = CALLOC_STRUCT(nvfx_render_stage); + + if (!nvfx->swtnl.vertprog) + nvfx->swtnl.vertprog = create_drawvp(nvfx); + + render->nvfx = nvfx; + render->stage.draw = nvfx->draw; + render->stage.point = nvfx_render_point; + render->stage.line = nvfx_render_line; + render->stage.tri = nvfx_render_tri; + render->stage.flush = nvfx_render_flush; + render->stage.reset_stipple_counter = nvfx_render_reset_stipple_counter; + render->stage.destroy = nvfx_render_destroy; + + return &render->stage; +} + +void +nvfx_draw_elements_swtnl(struct pipe_context *pipe, + struct pipe_buffer *idxbuf, unsigned idxbuf_size, + unsigned mode, unsigned start, unsigned count) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct pipe_screen *pscreen = pipe->screen; + unsigned i; + void *map; + + if (!nvfx_state_validate_swtnl(nvfx)) + return; + nvfx->state.dirty &= ~(1ULL << NVFX_STATE_VTXBUF); + nvfx_state_emit(nvfx); + + for (i = 0; i < nvfx->vtxbuf_nr; i++) { + map = pipe_buffer_map(pscreen, nvfx->vtxbuf[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(nvfx->draw, i, map); + } + + if (idxbuf) { + map = pipe_buffer_map(pscreen, idxbuf, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer(nvfx->draw, idxbuf_size, map); + } else { + draw_set_mapped_element_buffer(nvfx->draw, 0, NULL); + } + + if (nvfx->constbuf[PIPE_SHADER_VERTEX]) { + const unsigned nr = nvfx->constbuf_nr[PIPE_SHADER_VERTEX]; + + map = pipe_buffer_map(pscreen, + nvfx->constbuf[PIPE_SHADER_VERTEX], + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_constant_buffer(nvfx->draw, PIPE_SHADER_VERTEX, 0, + map, nr); + } + + draw_arrays(nvfx->draw, mode, start, count); + + for (i = 0; i < nvfx->vtxbuf_nr; i++) + pipe_buffer_unmap(pscreen, nvfx->vtxbuf[i].buffer); + + if (idxbuf) + pipe_buffer_unmap(pscreen, idxbuf); + + if (nvfx->constbuf[PIPE_SHADER_VERTEX]) + pipe_buffer_unmap(pscreen, nvfx->constbuf[PIPE_SHADER_VERTEX]); + + draw_flush(nvfx->draw); + pipe->flush(pipe, 0, NULL); +} + +static INLINE void +emit_attrib(struct nvfx_context *nvfx, unsigned hw, unsigned emit, + unsigned semantic, unsigned index) +{ + unsigned draw_out = draw_find_shader_output(nvfx->draw, semantic, index); + unsigned a = nvfx->swtnl.nr_attribs++; + + nvfx->swtnl.hw[a] = hw; + nvfx->swtnl.emit[a] = emit; + nvfx->swtnl.draw[a] = draw_out; +} + +static boolean +nvfx_state_vtxfmt_validate(struct nvfx_context *nvfx) +{ + struct nvfx_fragment_program *fp = nvfx->fragprog; + unsigned colour = 0, texcoords = 0, fog = 0, i; + + /* Determine needed fragprog inputs */ + for (i = 0; i < fp->info.num_inputs; i++) { + switch (fp->info.input_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + break; + case TGSI_SEMANTIC_COLOR: + colour |= (1 << fp->info.input_semantic_index[i]); + break; + case TGSI_SEMANTIC_GENERIC: + texcoords |= (1 << fp->info.input_semantic_index[i]); + break; + case TGSI_SEMANTIC_FOG: + fog = 1; + break; + default: + assert(0); + } + } + + nvfx->swtnl.nr_attribs = 0; + + /* Map draw vtxprog output to hw attribute IDs */ + for (i = 0; i < 2; i++) { + if (!(colour & (1 << i))) + continue; + emit_attrib(nvfx, 3 + i, EMIT_4UB, TGSI_SEMANTIC_COLOR, i); + } + + for (i = 0; i < 8; i++) { + if (!(texcoords & (1 << i))) + continue; + emit_attrib(nvfx, 8 + i, EMIT_4F, TGSI_SEMANTIC_GENERIC, i); + } + + if (fog) { + emit_attrib(nvfx, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0); + } + + emit_attrib(nvfx, 0, EMIT_3F, TGSI_SEMANTIC_POSITION, 0); + + return FALSE; +} + +struct nvfx_state_entry nvfx_state_vtxfmt = { + .validate = nvfx_state_vtxfmt_validate, + .dirty = { + .pipe = NVFX_NEW_ARRAYS | NVFX_NEW_FRAGPROG, + .hw = 0 + } +}; diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index d6fbc5d8dc..9961dacce4 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -3,6 +3,10 @@ #include "nvfx_state.h" #include "draw/draw_context.h" +/* temporary, will be removed in next patch */ +#define nv30_state_vtxfmt nvfx_state_vtxfmt +#define nv40_state_vtxfmt nvfx_state_vtxfmt + #define RENDER_STATES(name, nvxx, vbo) \ static struct nvfx_state_entry *name##_render_states[] = { \ &nvfx_state_framebuffer, \ @@ -22,7 +26,7 @@ static struct nvfx_state_entry *name##_render_states[] = { \ } RENDER_STATES(nv30, nv30, vbo); -RENDER_STATES(nv30_swtnl, nv30, vbo); /* TODO: replace with vtxfmt once draw is unified */ +RENDER_STATES(nv30_swtnl, nv30, vtxfmt); RENDER_STATES(nv40, nv40, vbo); RENDER_STATES(nv40_swtnl, nv40, vtxfmt); -- cgit v1.2.3