nvfx: rewrite draw code and buffer code

This is a full rewrite of the drawing and buffer management logic. It offers a lot of improvements: 1. A copy of buffers is now always kept in system memory. This is necessary to allow software processing of them, which is necessary or improves performance in many cases. 2. Support for pushing vertices on the FIFO, with index lookup if necessary. 3. "Smart" draw code that tries to intelligently choose the cheapest way to draw something: whether to use inline vertices or hardware vertex buffer, and whether to use hardware index buffers 4. Support for all vertex formats supported by the hardware 5. Usage of translate to push vertices, supporting all formats that are sensible to use as vertex formats 6. Support for base vertex 7. Usage of Ben Skeggs' primitive splitter originally for nv50, allowing correct splitting of line loops, triangle fans, etc. 8. Support for instancing 9. Precomputation using the vertex elements CSO Thanks to Ben Skeggs for his primitive splitter originally for nv50. Thanks to Christoph Bumiller for his nv50 push code, that was the basis of this work, even though I changed his code dramatically, in particular to replace his ad-hoc vertex data emitter with translate. The changes could also go into nv50 too, but there are substantial differences due to the additional nv50 hardware features.
author: Luca Barbieri <luca@luca-barbieri.com> 2010-08-07 05:39:18 +0200
committer: Luca Barbieri <luca@luca-barbieri.com> 2010-08-21 20:42:14 +0200
commit: 8eb0fc430a8c1687627156a06faf5762144022f3 (patch)
tree: faddeeecd24e26c1d92d9aeeeb5e4ba0dd276e96 /src/gallium/drivers/nvfx/nvfx_vbo.c
parent: 73b7c6fb336ad3e717f8e961f4e2df761e94cd2f (diff)
1 files changed, 477 insertions, 539 deletions
diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c
index 4aa3793842..a6cd125635 100644
--- a/src/gallium/drivers/nvfx/nvfx_vbo.c
+++ b/src/gallium/drivers/nvfx/nvfx_vbo.c
@@ -2,6 +2,7 @@
 #include "pipe/p_state.h"
 #include "util/u_inlines.h"
 #include "util/u_format.h"
+#include "translate/translate.h"
 
 #include "nvfx_context.h"
 #include "nvfx_state.h"
@@ -10,646 +11,583 @@
 #include "nouveau/nouveau_channel.h"
 #include "nouveau/nouveau_class.h"
 #include "nouveau/nouveau_pushbuf.h"
-#include "nouveau/nouveau_util.h"
 
-static INLINE int
-nvfx_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp)
+static inline unsigned
+util_guess_unique_indices_count(unsigned mode, unsigned indices)
 {
-	switch (pipe) {
-	case PIPE_FORMAT_R32_FLOAT:
-	case PIPE_FORMAT_R32G32_FLOAT:
-	case PIPE_FORMAT_R32G32B32_FLOAT:
-	case PIPE_FORMAT_R32G32B32A32_FLOAT:
-		*fmt = NV34TCL_VTXFMT_TYPE_FLOAT;
-		break;
-	case PIPE_FORMAT_R16_FLOAT:
-	case PIPE_FORMAT_R16G16_FLOAT:
-	case PIPE_FORMAT_R16G16B16_FLOAT:
-	case PIPE_FORMAT_R16G16B16A16_FLOAT:
-		*fmt = NV34TCL_VTXFMT_TYPE_HALF;
-		break;
-	case PIPE_FORMAT_R8_UNORM:
-	case PIPE_FORMAT_R8G8_UNORM:
-	case PIPE_FORMAT_R8G8B8_UNORM:
-	case PIPE_FORMAT_R8G8B8A8_UNORM:
-		*fmt = NV34TCL_VTXFMT_TYPE_UBYTE;
-		break;
-	case PIPE_FORMAT_R16_SSCALED:
-	case PIPE_FORMAT_R16G16_SSCALED:
-	case PIPE_FORMAT_R16G16B16_SSCALED:
-	case PIPE_FORMAT_R16G16B16A16_SSCALED:
-		*fmt = NV34TCL_VTXFMT_TYPE_USHORT;
-		break;
-	default:
-		NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe));
-		return 1;
+	/* Euler's formula gives V =
+	 * = E - F + 2 =
+	 * = F * (polygon_edges / 2 - 1) + 2 =
+	 * =  F * (polygon_edges - 2) / 2 + 2 =
+	 * =  indices * (polygon_edges - 2) / (2 * indices_per_face) + 2
+	 * =  indices * (1 / 2 - 1 / polygon_edges) + 2
+	 */
+	switch(mode)
+	{
+	case PIPE_PRIM_LINES:
+		return indices >> 1;
+	case PIPE_PRIM_TRIANGLES:
+	{
+		// avoid an expensive division by 3 using the multiplicative inverse mod 2^32
+		unsigned q;
+		unsigned inv3 = 2863311531;
+		indices >>= 1;
+		q = indices * inv3;
+		if(unlikely(q >= indices))
+		{
+			q += inv3;
+			if(q >= indices)
+				q += inv3;
+		}
+		return indices + 2;
+		//return indices / 6 + 2;
 	}
-
-	switch (pipe) {
-	case PIPE_FORMAT_R8_UNORM:
-	case PIPE_FORMAT_R32_FLOAT:
-	case PIPE_FORMAT_R16_FLOAT:
-	case PIPE_FORMAT_R16_SSCALED:
-		*ncomp = 1;
-		break;
-	case PIPE_FORMAT_R8G8_UNORM:
-	case PIPE_FORMAT_R32G32_FLOAT:
-	case PIPE_FORMAT_R16G16_FLOAT:
-	case PIPE_FORMAT_R16G16_SSCALED:
-		*ncomp = 2;
-		break;
-	case PIPE_FORMAT_R8G8B8_UNORM:
-	case PIPE_FORMAT_R32G32B32_FLOAT:
-	case PIPE_FORMAT_R16G16B16_FLOAT:
-	case PIPE_FORMAT_R16G16B16_SSCALED:
-		*ncomp = 3;
-		break;
-	case PIPE_FORMAT_R8G8B8A8_UNORM:
-	case PIPE_FORMAT_R32G32B32A32_FLOAT:
-	case PIPE_FORMAT_R16G16B16A16_FLOAT:
-	case PIPE_FORMAT_R16G16B16A16_SSCALED:
-		*ncomp = 4;
-		break;
+	// guess that indexed quads are created by successive connections, since a closed mesh seems unlikely
+	case PIPE_PRIM_QUADS:
+		return (indices >> 1) + 2;
+	//	return (indices >> 2) + 2; // if it is a closed mesh
 	default:
-		NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe));
-		return 1;
+		return indices;
 	}
-
-	return 0;
 }
 
-static boolean
-nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_resource *ib,
-		    unsigned ib_size)
+static unsigned nvfx_decide_upload_mode(struct pipe_context *pipe, const struct pipe_draw_info *info)
 {
-	unsigned type;
-
-	if (!ib) {
-		nvfx->idxbuf_buffer = NULL;
-		nvfx->idxbuf_format = 0xdeadbeef;
-		return FALSE;
+	struct nvfx_context* nvfx = nvfx_context(pipe);
+	unsigned hardware_cost = 0;
+	unsigned inline_cost = 0;
+	unsigned unique_vertices;
+	unsigned upload_mode;
+	if (info->indexed)
+		unique_vertices = util_guess_unique_indices_count(info->mode, info->count);
+	else
+		unique_vertices = info->count;
+
+	/* Here we try to figure out if we are better off writing vertex data directly on the FIFO,
+	 * or create hardware buffer objects and pointing the hardware to them.
+	 *
+	 * This is done by computing the total memcpy cost of each option, ignoring uploads
+	 * if we think that the buffer is static and thus the upload cost will be amortized over
+	 * future draw calls.
+	 *
+	 * For instance, if everything looks static, we will always create buffer objects, while if
+	 * everything is a user buffer and we are not doing indexed drawing, we never do.
+	 *
+	 * Other interesting cases are where a small user vertex buffer, but a huge user index buffer,
+	 * where we will upload the vertex buffer, so that we can use hardware index lookup, and
+	 * the opposite case, where we instead do index lookup in software to avoid uploading
+	 * a huge amount of vertex data that is not going to be used.
+	 *
+	 * Otherwise, we generally move to the GPU the after it has been pushed
+	 * NVFX_STATIC_BUFFER_MIN_REUSE_TIMES times to the GPU without having
+	 * been updated with a transfer (or just the buffer having been destroyed).
+	 *
+	 * There is no special handling for user buffers, since applications can use
+	 * OpenGL VBOs in a one-shot fashion. OpenGL 3/4 core profile forces this
+	 * by the way.
+	 *
+	 * Note that currently we don't support only putting some data on the FIFO, and
+	 * some on vertex buffers (constant and instanced data is independent from this).
+	 *
+	 * nVidia doesn't seem to do this either, even though it should be at least
+	 * doable with VTX_ATTR and possibly with VERTEX_DATA too if not indexed.
+	 */
+
+	for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++)
+	{
+		struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
+		struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
+		struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
+		buffer->bytes_to_draw_until_static -= vbi->per_vertex_size * unique_vertices;
+		if (!nvfx_buffer_seems_static(buffer))
+		{
+			hardware_cost += buffer->dirty_end - buffer->dirty_begin;
+			if (!buffer->base.bo)
+				hardware_cost += nvfx->screen->buffer_allocation_cost;
+		}
+		inline_cost += vbi->per_vertex_size * info->count;
 	}
 
-	if (!nvfx->screen->index_buffer_reloc_flags || ib_size == 1)
-		return FALSE;
+	float best_index_cost_for_hardware_vertices_as_inline_cost = 0.0f;
+	boolean prefer_hardware_indices = FALSE;
+	unsigned index_inline_cost = 0;
+	unsigned index_hardware_cost = 0;
 
-	switch (ib_size) {
-	case 2:
-		type = NV34TCL_IDXBUF_FORMAT_TYPE_U16;
-		break;
-	case 4:
-		type = NV34TCL_IDXBUF_FORMAT_TYPE_U32;
-		break;
-	default:
-		return FALSE;
-	}
+	if (info->indexed)
+	{
+		index_inline_cost = nvfx->idxbuf.index_size * info->count;
+		if (nvfx->screen->index_buffer_reloc_flags
+			&& (nvfx->idxbuf.index_size == 2 || nvfx->idxbuf.index_size == 4)
+			&& !(nvfx->idxbuf.offset & (nvfx->idxbuf.index_size - 1)))
+		{
+			struct nvfx_buffer* buffer = nvfx_buffer(nvfx->idxbuf.buffer);
+			buffer->bytes_to_draw_until_static -= index_inline_cost;
 
-	if (ib != nvfx->idxbuf_buffer ||
-	    type != nvfx->idxbuf_format) {
-		nvfx->dirty |= NVFX_NEW_ARRAYS;
-		nvfx->idxbuf_buffer = ib;
-		nvfx->idxbuf_format = type;
-	}
+			prefer_hardware_indices = TRUE;
 
-	return TRUE;
-}
+			if (!nvfx_buffer_seems_static(buffer))
+			{
+				index_hardware_cost = buffer->dirty_end - buffer->dirty_begin;
+				if (!buffer->base.bo)
+					index_hardware_cost += nvfx->screen->buffer_allocation_cost;
+			}
 
-// type must be floating point
-static inline void
-nvfx_vbo_static_attrib(struct nvfx_context *nvfx,
-		       int attrib, struct pipe_vertex_element *ve,
-		       struct pipe_vertex_buffer *vb, unsigned ncomp)
-{
-	struct pipe_transfer *transfer;
-	struct nouveau_channel* chan = nvfx->screen->base.channel;
-	void *map;
-	float *v;
-
-	map  = pipe_buffer_map(&nvfx->pipe, vb->buffer, PIPE_TRANSFER_READ, &transfer);
-	map = (uint8_t *) map + vb->buffer_offset + ve->src_offset;
-
-	v = map;
-
-	switch (ncomp) {
-	case 4:
-		OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_4F_X(attrib), 4));
-		OUT_RING(chan, fui(v[0]));
-		OUT_RING(chan, fui(v[1]));
-		OUT_RING(chan,  fui(v[2]));
-		OUT_RING(chan,  fui(v[3]));
-		break;
-	case 3:
-		OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_3F_X(attrib), 3));
-		OUT_RING(chan,  fui(v[0]));
-		OUT_RING(chan,  fui(v[1]));
-		OUT_RING(chan,  fui(v[2]));
-		break;
-	case 2:
-		OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_2F_X(attrib), 2));
-		OUT_RING(chan,  fui(v[0]));
-		OUT_RING(chan,  fui(v[1]));
-		break;
-	case 1:
-		OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_1F(attrib), 1));
-		OUT_RING(chan,  fui(v[0]));
-		break;
+			if ((float) index_inline_cost < (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost)
+			{
+				best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_inline_cost;
+			}
+			else
+			{
+				best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost;
+				prefer_hardware_indices = TRUE;
+			}
+		}
 	}
 
-	pipe_buffer_unmap(&nvfx->pipe, vb->buffer, transfer);
+	/* let's finally figure out which of the 3 paths we want to take */
+	if ((float) (inline_cost + index_inline_cost) > ((float) hardware_cost * nvfx->screen->inline_cost_per_hardware_cost + best_index_cost_for_hardware_vertices_as_inline_cost))
+		upload_mode = 1 + prefer_hardware_indices;
+	else
+		upload_mode = 0;
+
+#ifdef DEBUG
+        if (unlikely(nvfx->screen->trace_draw))
+          {
+                  fprintf(stderr, "DRAW");
+                  if (info->indexed)
+                  {
+                          fprintf(stderr, "_IDX%u", nvfx->idxbuf.index_size);
+                          if (info->index_bias)
+                                  fprintf(stderr, " biased %u", info->index_bias);
+                          fprintf(stderr, " idxrange %u -> %u", info->min_index, info->max_index);
+                  }
+                  if (info->instance_count > 1)
+                          fprintf(stderr, " %u instances from %u", info->instance_count, info->indexed);
+                  fprintf(stderr, " start %u count %u prim %u", info->start, info->count, info->mode);
+                  if (!upload_mode)
+                          fprintf(stderr, " -> inline vertex data");
+                  else if (upload_mode == 2 || !info->indexed)
+                          fprintf(stderr, " -> buffer range");
+                  else
+                          fprintf(stderr, " -> inline indices");
+                  fprintf(stderr, " [ivtx %u hvtx %u iidx %u hidx %u bidx %f] <", inline_cost, hardware_cost, index_inline_cost, index_hardware_cost, best_index_cost_for_hardware_vertices_as_inline_cost);
+                  for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; ++i)
+                  {
+                          struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
+                          struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
+                          struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
+                          if (i)
+                                  fprintf(stderr, ", ");
+                          fprintf(stderr, "%p%s left %Li", buffer, buffer->last_update_static ? " static" : "", buffer->bytes_to_draw_until_static);
+                  }
+                  fprintf(stderr, ">\n");
+          }
+#endif
+
+	return upload_mode;
 }
 
-static void
-nvfx_draw_arrays(struct pipe_context *pipe,
-		 unsigned mode, unsigned start, unsigned count)
+void nvfx_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
 {
 	struct nvfx_context *nvfx = nvfx_context(pipe);
-	struct nvfx_screen *screen = nvfx->screen;
-	struct nouveau_channel *chan = screen->base.channel;
-	unsigned restart = 0;
-
-	nvfx_vbo_set_idxbuf(nvfx, NULL, 0);
-	if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) {
-		nvfx_draw_elements_swtnl(pipe, NULL, 0, 0,
-                                           mode, start, count);
-                return;
-	}
+	unsigned upload_mode = 0;
 
-	while (count) {
-		unsigned vc, nr, avail;
+	if (!nvfx->vtxelt->needs_translate)
+		upload_mode = nvfx_decide_upload_mode(pipe, info);
 
-		nvfx_state_emit(nvfx);
+	nvfx->use_index_buffer = upload_mode > 1;
 
-		avail = AVAIL_RING(chan);
-		avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
+	if ((upload_mode > 0) != nvfx->use_vertex_buffers)
+	{
+		nvfx->use_vertex_buffers = (upload_mode > 0);
+		nvfx->dirty |= NVFX_NEW_ARRAYS;
+		nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
+	}
 
-		vc = nouveau_vbuf_split(avail, 6, 256,
-					mode, start, count, &restart);
-		if (!vc) {
-			FIRE_RING(chan);
-			continue;
+	if (upload_mode > 0)
+	{
+		for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++)
+		{
+			struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
+			struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
+			nvfx_buffer_upload(nvfx_buffer(vb->buffer));
 		}
 
-		OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-		OUT_RING  (chan, nvgl_primitive(mode));
+		if (upload_mode > 1)
+		{
+			nvfx_buffer_upload(nvfx_buffer(nvfx->idxbuf.buffer));
 
-		nr = (vc & 0xff);
-		if (nr) {
-			OUT_RING(chan, RING_3D(NV34TCL_VB_VERTEX_BATCH, 1));
-			OUT_RING  (chan, ((nr - 1) << 24) | start);
-			start += nr;
+			if (unlikely(info->index_bias != nvfx->base_vertex))
+			{
+				nvfx->base_vertex = info->index_bias;
+				nvfx->dirty |= NVFX_NEW_ARRAYS;
+			}
 		}
-
-		nr = vc >> 8;
-		while (nr) {
-			unsigned push = nr > 2047 ? 2047 : nr;
-
-			nr -= push;
-
-			OUT_RING(chan, RING_3D_NI(NV34TCL_VB_VERTEX_BATCH, push));
-			while (push--) {
-				OUT_RING(chan, ((0x100 - 1) << 24) | start);
-				start += 0x100;
+		else
+		{
+			if (unlikely(info->start < nvfx->base_vertex && nvfx->base_vertex))
+			{
+				nvfx->base_vertex = 0;
+				nvfx->dirty |= NVFX_NEW_ARRAYS;
 			}
 		}
-
-		OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-		OUT_RING  (chan, 0);
-
-		count -= vc;
-		start = restart;
 	}
 
-	pipe->flush(pipe, 0, NULL);
+	if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx))
+		nvfx_draw_vbo_swtnl(pipe, info);
+	else
+		nvfx_push_vbo(pipe, info);
 }
 
-static INLINE void
-nvfx_draw_elements_u08(struct nvfx_context *nvfx, void *ib,
-		       unsigned mode, unsigned start, unsigned count)
+boolean
+nvfx_vbo_validate(struct nvfx_context *nvfx)
 {
-	struct nvfx_screen *screen = nvfx->screen;
-	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_channel* chan = nvfx->screen->base.channel;
+	int i;
+	int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr);
+	unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD;
 
-	while (count) {
-		uint8_t *elts = (uint8_t *)ib + start;
-		unsigned vc, push, restart = 0, avail;
+	if (!elements)
+		return TRUE;
 
-		nvfx_state_emit(nvfx);
+	MARK_RING(chan, (5 + 2) * 16 + 2 + 11, 16 + 2);
+	for(unsigned i = 0; i < nvfx->vtxelt->num_constant; ++i)
+	{
+		struct nvfx_low_frequency_element *ve = &nvfx->vtxelt->constant[i];
+		struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
+		struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
+		float v[4];
+		ve->fetch_rgba_float(v, buffer->data + vb->buffer_offset + ve->src_offset, 0, 0);
+		nvfx_emit_vtx_attr(chan, ve->idx, v, ve->ncomp);
+	}
 
-		avail = AVAIL_RING(chan);
-		avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
 
-		vc = nouveau_vbuf_split(avail, 6, 2,
-					mode, start, count, &restart);
-		if (vc == 0) {
-			FIRE_RING(chan);
-			continue;
-		}
-		count -= vc;
+	OUT_RING(chan, RING_3D(NV34TCL_VTXFMT(0), elements));
+	if(nvfx->use_vertex_buffers)
+	{
+		unsigned idx = 0;
+		for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
+			struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
+			struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
 
-		OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-		OUT_RING  (chan, nvgl_primitive(mode));
+			if(idx != ve->idx)
+			{
+				assert(idx < ve->idx);
+				OUT_RINGp(chan, &nvfx->vtxelt->vtxfmt[idx], ve->idx - idx);
+				idx = ve->idx;
+			}
 
-		if (vc & 1) {
-			OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1));
-			OUT_RING  (chan, elts[0]);
-			elts++; vc--;
+			OUT_RING(chan, nvfx->vtxelt->vtxfmt[idx] | (vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT));
+			++idx;
 		}
+		if(idx != nvfx->vtxelt->num_elements)
+			OUT_RINGp(chan, &nvfx->vtxelt->vtxfmt[idx], nvfx->vtxelt->num_elements - idx);
+	}
+	else
+		OUT_RINGp(chan, nvfx->vtxelt->vtxfmt, nvfx->vtxelt->num_elements);
 
-		while (vc) {
-			unsigned i;
-
-			push = MIN2(vc, 2047 * 2);
-
-			OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1));
-			for (i = 0; i < push; i+=2)
-				OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
+	for(i = nvfx->vtxelt->num_elements; i < elements; ++i)
+		OUT_RING(chan, NV34TCL_VTXFMT_TYPE_32_FLOAT);
 
-			vc -= push;
-			elts += push;
+	if(nvfx->is_nv4x) {
+		unsigned i;
+		/* seems to be some kind of cache flushing */
+		for(i = 0; i < 3; ++i) {
+			OUT_RING(chan, RING_3D(0x1718, 1));
+			OUT_RING(chan, 0);
 		}
-
-		OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-		OUT_RING  (chan, 0);
-
-		start = restart;
 	}
-}
-
-static INLINE void
-nvfx_draw_elements_u16(struct nvfx_context *nvfx, void *ib,
-		       unsigned mode, unsigned start, unsigned count)
-{
-	struct nvfx_screen *screen = nvfx->screen;
-	struct nouveau_channel *chan = screen->base.channel;
-
-	while (count) {
-		uint16_t *elts = (uint16_t *)ib + start;
-		unsigned vc, push, restart = 0, avail;
 
-		nvfx_state_emit(nvfx);
-
-		avail = AVAIL_RING(chan);
-		avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
-
-		vc = nouveau_vbuf_split(avail, 6, 2,
-					mode, start, count, &restart);
-		if (vc == 0) {
-			FIRE_RING(chan);
-			continue;
-		}
-		count -= vc;
+	OUT_RING(chan, RING_3D(NV34TCL_VTXBUF_ADDRESS(0), elements));
+	if(nvfx->use_vertex_buffers)
+	{
+		unsigned idx = 0;
+		for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
+			struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
+			struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
+			struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
 
-		OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-		OUT_RING  (chan, nvgl_primitive(mode));
+			for(; idx < ve->idx; ++idx)
+				OUT_RING(chan, 0);
 
-		if (vc & 1) {
-			OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1));
-			OUT_RING  (chan, elts[0]);
-			elts++; vc--;
+			OUT_RELOC(chan, bo,
+					vb->buffer_offset + ve->src_offset + nvfx->base_vertex * vb->stride,
+					vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
+					0, NV34TCL_VTXBUF_ADDRESS_DMA1);
+			++idx;
 		}
 
-		while (vc) {
-			unsigned i;
-
-			push = MIN2(vc, 2047 * 2);
-
-			OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1));
-			for (i = 0; i < push; i+=2)
-				OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
-
-			vc -= push;
-			elts += push;
-		}
+		for(; idx < elements; ++idx)
+			OUT_RING(chan, 0);
+	}
+	else
+	{
+		for (i = 0; i < elements; i++)
+			OUT_RING(chan, 0);
+	}
 
-		OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-		OUT_RING  (chan, 0);
+	OUT_RING(chan, RING_3D(0x1710, 1));
+	OUT_RING(chan, 0);
 
-		start = restart;
-	}
+	nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements;
+	return TRUE;
 }
 
-static INLINE void
-nvfx_draw_elements_u32(struct nvfx_context *nvfx, void *ib,
-		       unsigned mode, unsigned start, unsigned count)
+void
+nvfx_vbo_relocate(struct nvfx_context *nvfx)
 {
-	struct nvfx_screen *screen = nvfx->screen;
-	struct nouveau_channel *chan = screen->base.channel;
-
-	while (count) {
-		uint32_t *elts = (uint32_t *)ib + start;
-		unsigned vc, push, restart = 0, avail;
-
-		nvfx_state_emit(nvfx);
-
-		avail = AVAIL_RING(chan);
-		avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
-
-		vc = nouveau_vbuf_split(avail, 5, 1,
-					mode, start, count, &restart);
-		if (vc == 0) {
-			FIRE_RING(chan);
-			continue;
-		}
-		count -= vc;
-
-		OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-		OUT_RING  (chan, nvgl_primitive(mode));
-
-		while (vc) {
-			push = MIN2(vc, 2047);
-
-			OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U32, push));
-			OUT_RINGp    (chan, elts, push);
-
-			vc -= push;
-			elts += push;
-		}
+        if(!nvfx->use_vertex_buffers)
+                return;
 
-		OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-		OUT_RING  (chan, 0);
+	struct nouveau_channel* chan = nvfx->screen->base.channel;
+	unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
+	int i;
 
-		start = restart;
+	MARK_RING(chan, 2 * 16 + 3, 2 * 16 + 3);
+        for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
+                struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
+                struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
+                struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
+
+                OUT_RELOC(chan, bo, RING_3D(NV34TCL_VTXBUF_ADDRESS(ve->idx), 1),
+				vb_flags, 0, 0);
+                OUT_RELOC(chan, bo, vb->buffer_offset + ve->src_offset + nvfx->base_vertex * vb->stride,
+				vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
+				0, NV34TCL_VTXBUF_ADDRESS_DMA1);
 	}
 }
 
 static void
-nvfx_draw_elements_inline(struct pipe_context *pipe,
-			  struct pipe_resource *ib,
-			  unsigned ib_size, int ib_bias,
-			  unsigned mode, unsigned start, unsigned count)
+nvfx_idxbuf_emit(struct nvfx_context* nvfx, unsigned ib_flags)
 {
-	struct nvfx_context *nvfx = nvfx_context(pipe);
-	struct pipe_transfer *transfer;
-	void *map;
-
-	map = pipe_buffer_map(pipe, ib, PIPE_TRANSFER_READ, &transfer);
-	if (!ib) {
-		NOUVEAU_ERR("failed mapping ib\n");
-		return;
-	}
+	struct nouveau_channel* chan = nvfx->screen->base.channel;
+	unsigned ib_format = (nvfx->idxbuf.index_size == 2) ? NV34TCL_IDXBUF_FORMAT_TYPE_U16 : NV34TCL_IDXBUF_FORMAT_TYPE_U32;
+	struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf.buffer)->bo;
+	ib_flags |= nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD;
 
-	assert(ib_bias == 0);
-
-	switch (ib_size) {
-	case 1:
-		nvfx_draw_elements_u08(nvfx, map, mode, start, count);
-		break;
-	case 2:
-		nvfx_draw_elements_u16(nvfx, map, mode, start, count);
-		break;
-	case 4:
-		nvfx_draw_elements_u32(nvfx, map, mode, start, count);
-		break;
-	default:
-		NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size);
-		break;
-	}
+	assert(nvfx->screen->index_buffer_reloc_flags);
 
-	pipe_buffer_unmap(pipe, ib, transfer);
+	MARK_RING(chan, 3, 3);
+	if(ib_flags & NOUVEAU_BO_DUMMY)
+		OUT_RELOC(chan, bo, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2), ib_flags, 0, 0);
+	else
+		OUT_RING(chan, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2));
+	OUT_RELOC(chan, bo, nvfx->idxbuf.offset + 1, ib_flags | NOUVEAU_BO_LOW, 0, 0);
+	OUT_RELOC(chan, bo, ib_format, ib_flags | NOUVEAU_BO_OR,
+			0, NV34TCL_IDXBUF_FORMAT_DMA1);
 }
 
-static void
-nvfx_draw_elements_vbo(struct pipe_context *pipe,
-		       unsigned mode, unsigned start, unsigned count)
+void
+nvfx_idxbuf_validate(struct nvfx_context* nvfx)
 {
-	struct nvfx_context *nvfx = nvfx_context(pipe);
-	struct nvfx_screen *screen = nvfx->screen;
-	struct nouveau_channel *chan = screen->base.channel;
-	unsigned restart = 0;
-
-	while (count) {
-		unsigned nr, vc, avail;
-
-		nvfx_state_emit(nvfx);
+	nvfx_idxbuf_emit(nvfx, 0);
+}
 
-		avail = AVAIL_RING(chan);
-		avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
+void
+nvfx_idxbuf_relocate(struct nvfx_context* nvfx)
+{
+	nvfx_idxbuf_emit(nvfx, NOUVEAU_BO_DUMMY);
+}
 
-		vc = nouveau_vbuf_split(avail, 6, 256,
-					mode, start, count, &restart);
-		if (!vc) {
-			FIRE_RING(chan);
-			continue;
-		}
+unsigned nvfx_vertex_formats[PIPE_FORMAT_COUNT] =
+{
+	[PIPE_FORMAT_R32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT,
+	[PIPE_FORMAT_R32G32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT,
+	[PIPE_FORMAT_R32G32B32A32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT,
+	[PIPE_FORMAT_R32G32B32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT,
+	[PIPE_FORMAT_R16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT,
+	[PIPE_FORMAT_R16G16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT,
+	[PIPE_FORMAT_R16G16B16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT,
+	[PIPE_FORMAT_R16G16B16A16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT,
+	[PIPE_FORMAT_R8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM,
+	[PIPE_FORMAT_R8G8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM,
+	[PIPE_FORMAT_R8G8B8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM,
+	[PIPE_FORMAT_R8G8B8A8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM,
+	[PIPE_FORMAT_R8G8B8A8_USCALED] = NV34TCL_VTXFMT_TYPE_8_USCALED,
+	[PIPE_FORMAT_R16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM,
+	[PIPE_FORMAT_R16G16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM,
+	[PIPE_FORMAT_R16G16B16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM,
+	[PIPE_FORMAT_R16G16B16A16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM,
+	[PIPE_FORMAT_R16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED,
+	[PIPE_FORMAT_R16G16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED,
+	[PIPE_FORMAT_R16G16B16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED,
+	[PIPE_FORMAT_R16G16B16A16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED,
+};
+
+static void *
+nvfx_vtxelts_state_create(struct pipe_context *pipe,
+			  unsigned num_elements,
+			  const struct pipe_vertex_element *elements)
+{
+	struct nvfx_context* nvfx = nvfx_context(pipe);
+	struct nvfx_vtxelt_state *cso = CALLOC_STRUCT(nvfx_vtxelt_state);
+        struct translate_key transkey;
+        unsigned per_vertex_size[16];
+        memset(per_vertex_size, 0, sizeof(per_vertex_size));
+
+        unsigned vb_compacted_index[16];
+
+	assert(num_elements < 16); /* not doing fallbacks yet */
+
+	memcpy(cso->pipe, elements, num_elements * sizeof(elements[0]));
+	cso->num_elements = num_elements;
+	cso->needs_translate = FALSE;
+
+	transkey.nr_elements = 0;
+	transkey.output_stride = 0;
+
+	for(unsigned i = 0; i < num_elements; ++i)
+        {
+		const struct pipe_vertex_element* ve = &elements[i];
+		if(!ve->instance_divisor)
+                        per_vertex_size[ve->vertex_buffer_index] += util_format_get_stride(ve->src_format, 1);
+        }
+
+        for(unsigned i = 0; i < 16; ++i)
+        {
+                if(per_vertex_size[i])
+                {
+                        unsigned idx = cso->num_per_vertex_buffer_infos++;
+                        cso->per_vertex_buffer_info[idx].vertex_buffer_index = i;
+                        cso->per_vertex_buffer_info[idx].per_vertex_size = per_vertex_size[i];
+                        vb_compacted_index[i] = idx;
+                }
+        }
+
+	for(unsigned i = 0; i < num_elements; ++i)
+	{
+		const struct pipe_vertex_element* ve = &elements[i];
+		unsigned type = nvfx_vertex_formats[ve->src_format];
+		unsigned ncomp = util_format_get_nr_components(ve->src_format);
 
-		OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-		OUT_RING  (chan, nvgl_primitive(mode));
+		//if(ve->frequency != PIPE_ELEMENT_FREQUENCY_PER_VERTEX)
+		if(ve->instance_divisor)
+		{
+			struct nvfx_low_frequency_element* lfve;
+			cso->vtxfmt[i] = NV34TCL_VTXFMT_TYPE_32_FLOAT;
+
+			//if(ve->frequency == PIPE_ELEMENT_FREQUENCY_CONSTANT)
+			if(0)
+				lfve = &cso->constant[cso->num_constant++];
+			else
+			{
+				lfve = &cso->per_instance[cso->num_per_instance++].base;
+				((struct nvfx_per_instance_element*)lfve)->instance_divisor = ve->instance_divisor;
+			}
 
-		nr = (vc & 0xff);
-		if (nr) {
-			OUT_RING(chan, RING_3D(NV34TCL_VB_INDEX_BATCH, 1));
-			OUT_RING  (chan, ((nr - 1) << 24) | start);
-			start += nr;
+                        lfve->idx = i;
+                        lfve->vertex_buffer_index = ve->vertex_buffer_index;
+                        lfve->src_offset = ve->src_offset;
+                        lfve->fetch_rgba_float = util_format_description(ve->src_format)->fetch_rgba_float;
+                        lfve->ncomp = ncomp;
 		}
-
-		nr = vc >> 8;
-		while (nr) {
-			unsigned push = nr > 2047 ? 2047 : nr;
-
-			nr -= push;
-
-			OUT_RING(chan, RING_3D_NI(NV34TCL_VB_INDEX_BATCH, push));
-			while (push--) {
-				OUT_RING(chan, ((0x100 - 1) << 24) | start);
-				start += 0x100;
+		else
+		{
+			unsigned idx;
+
+			idx = cso->num_per_vertex++;
+			cso->per_vertex[idx].idx = i;
+			cso->per_vertex[idx].vertex_buffer_index = ve->vertex_buffer_index;
+			cso->per_vertex[idx].src_offset = ve->src_offset;
+
+			idx = transkey.nr_elements++;
+			transkey.element[idx].input_format = ve->src_format;
+			transkey.element[idx].input_buffer = vb_compacted_index[ve->vertex_buffer_index];
+			transkey.element[idx].input_offset = ve->src_offset;
+			transkey.element[idx].instance_divisor = 0;
+			transkey.element[idx].type = TRANSLATE_ELEMENT_NORMAL;
+			if(type)
+			{
+				transkey.element[idx].output_format = ve->src_format;
+				cso->vtxfmt[i] = (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type;
+			}
+			else
+			{
+				unsigned float32[4] = {PIPE_FORMAT_R32_FLOAT, PIPE_FORMAT_R32G32_FLOAT, PIPE_FORMAT_R32G32B32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT};
+				transkey.element[idx].output_format = float32[ncomp - 1];
+				cso->needs_translate = TRUE;
+				cso->vtxfmt[i] = (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | NV34TCL_VTXFMT_TYPE_32_FLOAT;
 			}
+			transkey.element[idx].output_offset = transkey.output_stride;
+			transkey.output_stride += (util_format_get_stride(transkey.element[idx].output_format, 1) + 3) & ~3;
 		}
+	}
 
-		OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-		OUT_RING  (chan, 0);
+	cso->translate = translate_generic_create(&transkey);
+	cso->vertex_length = transkey.output_stride >> 2;
+	cso->max_vertices_per_packet = 2047 / cso->vertex_length;
 
-		count -= vc;
-		start = restart;
-	}
+	return (void *)cso;
 }
 
 static void
-nvfx_draw_elements(struct pipe_context *pipe,
-		   struct pipe_resource *indexBuffer,
-		   unsigned indexSize, int indexBias,
-		   unsigned mode, unsigned start, unsigned count)
+nvfx_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
 {
-	struct nvfx_context *nvfx = nvfx_context(pipe);
-	boolean idxbuf;
-
-	idxbuf = nvfx_vbo_set_idxbuf(nvfx, indexBuffer, indexSize);
-	if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) {
-		nvfx_draw_elements_swtnl(pipe,
-		                         indexBuffer, indexSize, indexBias,
-		                         mode, start, count);
-		return;
-	}
-
-	if (idxbuf) {
-		nvfx_draw_elements_vbo(pipe, mode, start, count);
-	} else {
-		nvfx_draw_elements_inline(pipe,
-		                          indexBuffer, indexSize, indexBias,
-					  mode, start, count);
-	}
-
-	pipe->flush(pipe, 0, NULL);
+	FREE(hwcso);
 }
 
-void
-nvfx_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
+static void
+nvfx_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
 {
 	struct nvfx_context *nvfx = nvfx_context(pipe);
 
-	if (info->indexed && nvfx->idxbuf.buffer) {
-		unsigned offset;
-
-		assert(nvfx->idxbuf.offset % nvfx->idxbuf.index_size == 0);
-		offset = nvfx->idxbuf.offset / nvfx->idxbuf.index_size;
-
-		nvfx_draw_elements(pipe,
-				   nvfx->idxbuf.buffer,
-				   nvfx->idxbuf.index_size,
-				   info->index_bias,
-				   info->mode,
-				   info->start + offset,
-				   info->count);
-	}
-	else {
-		nvfx_draw_arrays(pipe,
-				info->mode,
-				info->start,
-				info->count);
-	}
+	nvfx->vtxelt = hwcso;
+	nvfx->use_vertex_buffers = -1;
+	nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
 }
 
-boolean
-nvfx_vbo_validate(struct nvfx_context *nvfx)
+static void
+nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
+			const struct pipe_vertex_buffer *vb)
 {
-	struct nouveau_channel* chan = nvfx->screen->base.channel;
-	struct pipe_resource *ib = nvfx->idxbuf_buffer;
-	unsigned ib_format = nvfx->idxbuf_format;
-	int i;
-	int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr);
-	uint32_t vtxfmt[16];
-	unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD;
-
-	if (!elements)
-		return TRUE;
-
-	nvfx->vbo_bo = 0;
-
-	MARK_RING(chan, (5 + 2) * 16 + 2 + 11, 16 + 2);
-	for (i = 0; i < nvfx->vtxelt->num_elements; i++) {
-		struct pipe_vertex_element *ve;
-		struct pipe_vertex_buffer *vb;
-		unsigned type, ncomp;
-
-		ve = &nvfx->vtxelt->pipe[i];
-		vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
-
-		if (nvfx_vbo_format_to_hw(ve->src_format, &type, &ncomp)) {
-			MARK_UNDO(chan);
-			nvfx->fallback_swtnl |= NVFX_NEW_ARRAYS;
-			return FALSE;
-		}
+	struct nvfx_context *nvfx = nvfx_context(pipe);
 
-		if (!vb->stride && type == NV34TCL_VTXFMT_TYPE_FLOAT) {
-			nvfx_vbo_static_attrib(nvfx, i, ve, vb, ncomp);
-			vtxfmt[i] = type;
-		} else {
-			vtxfmt[i] = ((vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT) |
-				(ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type);
-			nvfx->vbo_bo |= (1 << i);
-		}
+	for(unsigned i = 0; i < count; ++i)
+	{
+		pipe_resource_reference(&nvfx->vtxbuf[i].buffer, vb[i].buffer);
+		nvfx->vtxbuf[i].buffer_offset = vb[i].buffer_offset;
+		nvfx->vtxbuf[i].max_index = vb[i].max_index;
+		nvfx->vtxbuf[i].stride = vb[i].stride;
 	}
 
-	for(; i < elements; ++i)
-		vtxfmt[i] = NV34TCL_VTXFMT_TYPE_FLOAT;
-
-	OUT_RING(chan, RING_3D(NV34TCL_VTXFMT(0), elements));
-	OUT_RINGp(chan, vtxfmt, elements);
-
-	if(nvfx->is_nv4x) {
-		unsigned i;
-		/* seems to be some kind of cache flushing */
-		for(i = 0; i < 3; ++i) {
-			OUT_RING(chan, RING_3D(0x1718, 1));
-			OUT_RING(chan, 0);
-		}
-	}
+	for(unsigned i = count; i < nvfx->vtxbuf_nr; ++i)
+		pipe_resource_reference(&nvfx->vtxbuf[i].buffer, 0);
 
-	OUT_RING(chan, RING_3D(NV34TCL_VTXBUF_ADDRESS(0), elements));
-	for (i = 0; i < nvfx->vtxelt->num_elements; i++) {
-		struct pipe_vertex_element *ve;
-		struct pipe_vertex_buffer *vb;
+	nvfx->vtxbuf_nr = count;
+	nvfx->use_vertex_buffers = -1;
+	nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
+}
 
-		ve = &nvfx->vtxelt->pipe[i];
-		vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
+static void
+nvfx_set_index_buffer(struct pipe_context *pipe,
+		      const struct pipe_index_buffer *ib)
+{
+	struct nvfx_context *nvfx = nvfx_context(pipe);
 
-		if (!(nvfx->vbo_bo & (1 << i)))
-			OUT_RING(chan, 0);
-		else
-		{
-			struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
-			OUT_RELOC(chan, bo,
-				 vb->buffer_offset + ve->src_offset,
-				 vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
-				 0, NV34TCL_VTXBUF_ADDRESS_DMA1);
-		}
+	if(ib)
+	{
+		pipe_resource_reference(&nvfx->idxbuf.buffer, ib->buffer);
+		nvfx->idxbuf.index_size = ib->index_size;
+		nvfx->idxbuf.offset = ib->offset;
 	}
-
-        for (; i < elements; i++)
-		OUT_RING(chan, 0);
-
-	OUT_RING(chan, RING_3D(0x1710, 1));
-	OUT_RING(chan, 0);
-
-	if (ib) {
-		unsigned ib_flags = nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD;
-		struct nouveau_bo* bo = nvfx_resource(ib)->bo;
-
-		assert(nvfx->screen->index_buffer_reloc_flags);
-
-		OUT_RING(chan, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2));
-		OUT_RELOC(chan, bo, 0, ib_flags | NOUVEAU_BO_LOW, 0, 0);
-		OUT_RELOC(chan, bo, ib_format, ib_flags | NOUVEAU_BO_OR,
-				  0, NV34TCL_IDXBUF_FORMAT_DMA1);
+	else
+	{
+		pipe_resource_reference(&nvfx->idxbuf.buffer, 0);
+		nvfx->idxbuf.index_size = 0;
+		nvfx->idxbuf.offset = 0;
 	}
 
-	nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements;
-	return TRUE;
+	nvfx->dirty |= NVFX_NEW_INDEX;
+	nvfx->draw_dirty |= NVFX_NEW_INDEX;
 }
 
 void
-nvfx_vbo_relocate(struct nvfx_context *nvfx)
+nvfx_init_vbo_functions(struct nvfx_context *nvfx)
 {
-	struct nouveau_channel* chan = nvfx->screen->base.channel;
-	unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
-	int i;
+	nvfx->pipe.set_vertex_buffers = nvfx_set_vertex_buffers;
+	nvfx->pipe.set_index_buffer = nvfx_set_index_buffer;
 
-	MARK_RING(chan, 2 * 16 + 3, 2 * 16 + 3);
-	for(i = 0; i < nvfx->vtxelt->num_elements; ++i) {
-		if(nvfx->vbo_bo & (1 << i)) {
-			struct pipe_vertex_element *ve = &nvfx->vtxelt->pipe[i];
-			struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
-			struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
-			OUT_RELOC(chan, bo, RING_3D(NV34TCL_VTXBUF_ADDRESS(i), 1),
-					vb_flags, 0, 0);
-			OUT_RELOC(chan, bo, vb->buffer_offset + ve->src_offset,
-					vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
-					0, NV34TCL_VTXBUF_ADDRESS_DMA1);
-		}
-	}
-
-	if(nvfx->idxbuf_buffer)
-	{
-		unsigned ib_flags = nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
-		struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf_buffer)->bo;
-
-		assert(nvfx->screen->index_buffer_reloc_flags);
-
-		OUT_RELOC(chan, bo, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2),
-				ib_flags, 0, 0);
-		OUT_RELOC(chan, bo, 0,
-				ib_flags | NOUVEAU_BO_LOW, 0, 0);
-		OUT_RELOC(chan, bo, nvfx->idxbuf_format,
-				ib_flags | NOUVEAU_BO_OR,
-				0, NV34TCL_IDXBUF_FORMAT_DMA1);
-	}
+	nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create;
+	nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete;
+	nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind;
 }
author	Luca Barbieri <luca@luca-barbieri.com>	2010-08-07 05:39:18 +0200
committer	Luca Barbieri <luca@luca-barbieri.com>	2010-08-21 20:42:14 +0200
commit	8eb0fc430a8c1687627156a06faf5762144022f3 (patch)
tree	faddeeecd24e26c1d92d9aeeeb5e4ba0dd276e96 /src/gallium/drivers/nvfx/nvfx_vbo.c
parent	73b7c6fb336ad3e717f8e961f4e2df761e94cd2f (diff)