summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/nv50/nv50_vbo.c207
1 files changed, 162 insertions, 45 deletions
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index 3238629461..b9cf0754b7 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -40,6 +40,8 @@ nv50_push_elements_u32(struct nv50_context *, uint32_t *, unsigned);
static boolean
nv50_push_arrays(struct nv50_context *, unsigned, unsigned);
+#define NV50_USING_LOATHED_EDGEFLAG(ctx) ((ctx)->vertprog->cfg.edgeflag_in < 16)
+
static INLINE unsigned
nv50_prim(unsigned mode)
{
@@ -171,7 +173,7 @@ nv50_upload_vtxbuf(struct nv50_context *nv50, unsigned i)
struct pipe_buffer *buf = nscreen->strm_vbuf[i];
struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i];
uint8_t *src;
- unsigned size = MAX2(vb->buffer->size, 4096);
+ unsigned size = align(vb->buffer->size, 4096);
if (buf && buf->size < size)
pipe_buffer_reference(&nscreen->strm_vbuf[i], NULL);
@@ -215,6 +217,74 @@ nv50_upload_user_vbufs(struct nv50_context *nv50)
}
}
+static void
+nv50_set_static_vtxattr(struct nv50_context *nv50, unsigned i, void *data)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_channel *chan = tesla->channel;
+ float v[4];
+
+ util_format_read_4f(nv50->vtxelt[i].src_format,
+ v, 0, data, 0, 0, 0, 1, 1);
+
+ switch (nv50->vtxelt[i].nr_components) {
+ case 4:
+ BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_4F_X(i), 4);
+ OUT_RING (chan, v[0]);
+ OUT_RING (chan, v[1]);
+ OUT_RING (chan, v[2]);
+ OUT_RING (chan, v[3]);
+ break;
+ case 3:
+ BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_3F_X(i), 4);
+ OUT_RING (chan, v[0]);
+ OUT_RING (chan, v[1]);
+ OUT_RING (chan, v[2]);
+ break;
+ case 2:
+ BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_2F_X(i), 4);
+ OUT_RING (chan, v[0]);
+ OUT_RING (chan, v[1]);
+ break;
+ case 1:
+ BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_1F(i), 4);
+ OUT_RING (chan, v[0]);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static unsigned
+init_per_instance_arrays_immd(struct nv50_context *nv50,
+ unsigned startInstance,
+ unsigned pos[16], unsigned step[16])
+{
+ struct nouveau_bo *bo;
+ unsigned i, b, count = 0;
+
+ for (i = 0; i < nv50->vtxelt_nr; ++i) {
+ if (!nv50->vtxelt[i].instance_divisor)
+ continue;
+ ++count;
+ b = nv50->vtxelt[i].vertex_buffer_index;
+
+ pos[i] = nv50->vtxelt[i].src_offset +
+ nv50->vtxbuf[b].buffer_offset +
+ startInstance * nv50->vtxbuf[b].stride;
+ step[i] = startInstance % nv50->vtxelt[i].instance_divisor;
+
+ bo = nouveau_bo(nv50->vtxbuf[b].buffer);
+ if (!bo->map)
+ nouveau_bo_map(bo, NOUVEAU_BO_RD);
+
+ nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]);
+ }
+
+ return count;
+}
+
static unsigned
init_per_instance_arrays(struct nv50_context *nv50,
unsigned startInstance,
@@ -227,6 +297,10 @@ init_per_instance_arrays(struct nv50_context *nv50,
unsigned i, b, count = 0;
const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+ if (nv50->vbo_fifo)
+ return init_per_instance_arrays_immd(nv50, startInstance,
+ pos, step);
+
so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2);
for (i = 0; i < nv50->vtxelt_nr; ++i) {
@@ -248,11 +322,11 @@ init_per_instance_arrays(struct nv50_context *nv50,
bo = nouveau_bo(nv50->vtxbuf[b].buffer);
so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
- so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0);
so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0);
}
- if (count) {
+ if (count && startInstance) {
so_ref (so, &nv50->state.instbuf); /* for flush notify */
so_emit(chan, nv50->state.instbuf);
}
@@ -262,6 +336,28 @@ init_per_instance_arrays(struct nv50_context *nv50,
}
static void
+step_per_instance_arrays_immd(struct nv50_context *nv50,
+ unsigned pos[16], unsigned step[16])
+{
+ struct nouveau_bo *bo;
+ unsigned i, b;
+
+ for (i = 0; i < nv50->vtxelt_nr; ++i) {
+ if (!nv50->vtxelt[i].instance_divisor)
+ continue;
+ if (++step[i] != nv50->vtxelt[i].instance_divisor)
+ continue;
+ b = nv50->vtxelt[i].vertex_buffer_index;
+ bo = nouveau_bo(nv50->vtxbuf[b].buffer);
+
+ step[i] = 0;
+ pos[i] += nv50->vtxbuf[b].stride;
+
+ nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]);
+ }
+}
+
+static void
step_per_instance_arrays(struct nv50_context *nv50,
unsigned pos[16], unsigned step[16])
{
@@ -272,6 +368,11 @@ step_per_instance_arrays(struct nv50_context *nv50,
unsigned i, b;
const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+ if (nv50->vbo_fifo) {
+ step_per_instance_arrays_immd(nv50, pos, step);
+ return;
+ }
+
so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2);
for (i = 0; i < nv50->vtxelt_nr; ++i) {
@@ -287,8 +388,8 @@ step_per_instance_arrays(struct nv50_context *nv50,
bo = nouveau_bo(nv50->vtxbuf[b].buffer);
so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
- so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0);
so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0);
}
so_ref (so, &nv50->state.instbuf); /* for flush notify */
@@ -297,6 +398,16 @@ step_per_instance_arrays(struct nv50_context *nv50,
so_emit(chan, nv50->state.instbuf);
}
+static INLINE void
+nv50_unmap_vbufs(struct nv50_context *nv50)
+{
+ unsigned i;
+
+ for (i = 0; i < nv50->vtxbuf_nr; ++i)
+ if (nouveau_bo(nv50->vtxbuf[i].buffer)->map)
+ nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer));
+}
+
void
nv50_draw_arrays_instanced(struct pipe_context *pipe,
unsigned mode, unsigned start, unsigned count,
@@ -308,7 +419,8 @@ nv50_draw_arrays_instanced(struct pipe_context *pipe,
unsigned i, nz_divisors;
unsigned step[16], pos[16];
- nv50_upload_user_vbufs(nv50);
+ if (!NV50_USING_LOATHED_EDGEFLAG(nv50))
+ nv50_upload_user_vbufs(nv50);
nv50_state_validate(nv50);
@@ -320,9 +432,14 @@ nv50_draw_arrays_instanced(struct pipe_context *pipe,
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
OUT_RING (chan, nv50_prim(mode));
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
- OUT_RING (chan, start);
- OUT_RING (chan, count);
+
+ if (nv50->vbo_fifo)
+ nv50_push_arrays(nv50, start, count);
+ else {
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
+ OUT_RING (chan, start);
+ OUT_RING (chan, count);
+ }
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
OUT_RING (chan, 0);
@@ -332,12 +449,18 @@ nv50_draw_arrays_instanced(struct pipe_context *pipe,
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
OUT_RING (chan, nv50_prim(mode) | (1 << 28));
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
- OUT_RING (chan, start);
- OUT_RING (chan, count);
+
+ if (nv50->vbo_fifo)
+ nv50_push_arrays(nv50, start, count);
+ else {
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
+ OUT_RING (chan, start);
+ OUT_RING (chan, count);
+ }
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
OUT_RING (chan, 0);
}
+ nv50_unmap_vbufs(nv50);
so_ref(NULL, &nv50->state.instbuf);
}
@@ -372,6 +495,8 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
OUT_RING (chan, 0);
+ nv50_unmap_vbufs(nv50);
+
/* XXX: not sure what to do if ret != TRUE: flush and retry?
*/
assert(ret);
@@ -400,7 +525,7 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map,
unsigned nr = count > 2046 ? 2046 : count;
int i;
- BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1);
+ BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1);
for (i = 0; i < nr; i += 2)
OUT_RING (chan, (map[i + 1] << 16) | map[i]);
@@ -433,7 +558,7 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
unsigned nr = count > 2046 ? 2046 : count;
int i;
- BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1);
+ BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1);
for (i = 0; i < nr; i += 2)
OUT_RING (chan, (map[i + 1] << 16) | map[i]);
@@ -458,7 +583,7 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map,
while (count) {
unsigned nr = count > 2047 ? 2047 : count;
- BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x40000000, nr);
+ BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U32, nr);
OUT_RINGp (chan, map, nr);
count -= nr;
@@ -502,7 +627,8 @@ nv50_draw_elements_instanced(struct pipe_context *pipe,
map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
- nv50_upload_user_vbufs(nv50);
+ if (!NV50_USING_LOATHED_EDGEFLAG(nv50))
+ nv50_upload_user_vbufs(nv50);
nv50_state_validate(nv50);
@@ -513,7 +639,7 @@ nv50_draw_elements_instanced(struct pipe_context *pipe,
OUT_RING (chan, startInstance);
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
- OUT_RING (chan, nv50_prim(mode) | (1 << 28));
+ OUT_RING (chan, nv50_prim(mode));
nv50_draw_elements_inline(nv50, map, indexSize, start, count);
@@ -532,6 +658,7 @@ nv50_draw_elements_instanced(struct pipe_context *pipe,
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
OUT_RING (chan, 0);
}
+ nv50_unmap_vbufs(nv50);
so_ref(NULL, &nv50->state.instbuf);
}
@@ -564,6 +691,8 @@ nv50_draw_elements(struct pipe_context *pipe,
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
OUT_RING (chan, 0);
+ nv50_unmap_vbufs(nv50);
+
pipe_buffer_unmap(pscreen, indexBuffer);
}
@@ -644,7 +773,7 @@ nv50_vbo_validate(struct nv50_context *nv50)
!(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX))
nv50->vbo_fifo = 0xffff;
- if (nv50->vertprog->cfg.edgeflag_in < 16)
+ if (NV50_USING_LOATHED_EDGEFLAG(nv50))
nv50->vbo_fifo = 0xffff; /* vertprog can't set edgeflag */
n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr);
@@ -672,14 +801,16 @@ nv50_vbo_validate(struct nv50_context *nv50)
nv50->vbo_fifo &= ~(1 << i);
continue;
}
- so_data(vtxfmt, hw | i);
if (nv50->vbo_fifo) {
+ so_data (vtxfmt, hw |
+ (ve->instance_divisor ? (1 << 4) : i));
so_method(vtxbuf, tesla,
NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
so_data (vtxbuf, 0);
continue;
}
+ so_data(vtxfmt, hw | i);
so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 3);
so_data (vtxbuf, 0x20000000 |
@@ -721,7 +852,7 @@ typedef void (*pfn_push)(struct nouveau_channel *, void *);
struct nv50_vbo_emitctx
{
pfn_push push[16];
- void *map[16];
+ uint8_t *map[16];
unsigned stride[16];
unsigned nr_ve;
unsigned vtx_dwords;
@@ -759,19 +890,18 @@ nv50_map_vbufs(struct nv50_context *nv50)
for (i = 0; i < nv50->vtxbuf_nr; ++i) {
struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i];
- unsigned size, delta;
+ unsigned size = vb->stride * (vb->max_index + 1) + 16;
if (nouveau_bo(vb->buffer)->map)
continue;
- size = vb->stride * (vb->max_index + 1);
- delta = vb->buffer_offset;
-
+ size = vb->stride * (vb->max_index + 1) + 16;
+ size = MIN2(size, vb->buffer->size);
if (!size)
- size = vb->buffer->size - vb->buffer_offset;
+ size = vb->buffer->size;
if (nouveau_bo_map_range(nouveau_bo(vb->buffer),
- delta, size, NOUVEAU_BO_RD))
+ 0, size, NOUVEAU_BO_RD))
break;
}
@@ -782,16 +912,6 @@ nv50_map_vbufs(struct nv50_context *nv50)
return FALSE;
}
-static INLINE void
-nv50_unmap_vbufs(struct nv50_context *nv50)
-{
- unsigned i;
-
- for (i = 0; i < nv50->vtxbuf_nr; ++i)
- if (nouveau_bo(nv50->vtxbuf[i].buffer)->map)
- nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer));
-}
-
static void
emit_b32_1(struct nouveau_channel *chan, void *data)
{
@@ -886,12 +1006,13 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit,
ve = &nv50->vtxelt[i];
vb = &nv50->vtxbuf[ve->vertex_buffer_index];
- if (!(nv50->vbo_fifo & (1 << i)))
+ if (!(nv50->vbo_fifo & (1 << i)) || ve->instance_divisor)
continue;
n = emit->nr_ve++;
emit->stride[n] = vb->stride;
- emit->map[n] = nouveau_bo(vb->buffer)->map +
+ emit->map[n] = (uint8_t *)nouveau_bo(vb->buffer)->map +
+ vb->buffer_offset +
(start * vb->stride + ve->src_offset);
desc = util_format_description(ve->src_format);
@@ -981,13 +1102,12 @@ nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count)
set_edgeflag(chan, tesla, &emit, 0); /* nr will be 1 */
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
+ BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
for (i = 0; i < nr; ++i)
emit_vtx_next(chan, &emit);
count -= nr;
}
- nv50_unmap_vbufs(nv50);
return TRUE;
}
@@ -1008,13 +1128,12 @@ nv50_push_elements_u32(struct nv50_context *nv50, uint32_t *map, unsigned count)
set_edgeflag(chan, tesla, &emit, *map);
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
+ BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
for (i = 0; i < nr; ++i)
emit_vtx(chan, &emit, *map++);
count -= nr;
}
- nv50_unmap_vbufs(nv50);
return TRUE;
}
@@ -1035,13 +1154,12 @@ nv50_push_elements_u16(struct nv50_context *nv50, uint16_t *map, unsigned count)
set_edgeflag(chan, tesla, &emit, *map);
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
+ BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
for (i = 0; i < nr; ++i)
emit_vtx(chan, &emit, *map++);
count -= nr;
}
- nv50_unmap_vbufs(nv50);
return TRUE;
}
@@ -1062,13 +1180,12 @@ nv50_push_elements_u08(struct nv50_context *nv50, uint8_t *map, unsigned count)
set_edgeflag(chan, tesla, &emit, *map);
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
+ BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
for (i = 0; i < nr; ++i)
emit_vtx(chan, &emit, *map++);
count -= nr;
}
- nv50_unmap_vbufs(nv50);
return TRUE;
}