From c2ad9e380121d43f8ac066b590f9929a56bd1b0f Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Wed, 17 Feb 2010 16:41:30 +0100
Subject: util: new util_format_get_nr_components helper

---
 src/gallium/auxiliary/util/u_format.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index 2fbbb83d4b..6345a11a8c 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -404,6 +404,23 @@ util_format_has_alpha(enum pipe_format format)
    }
 }
 
+/**
+ * Return the number of components stored.
+ * Formats with block size != 1x1 will always have 1 component (the block).
+ */
+static INLINE unsigned
+util_format_get_nr_components(enum pipe_format format)
+{
+   const struct util_format_description *desc = util_format_description(format);
+   unsigned chan;
+   unsigned nr_comp = 0;
+   for (chan = 0; chan < 4; chan++) {
+      if (desc->channel[chan].size != 0) {
+         nr_comp++;
+      }
+   }
+   return nr_comp;
+}
 
 /*
  * Format access functions.
-- 
cgit v1.2.3


From 464a72dd4154f314e08c9d0c4d07417e2bf255f0 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Wed, 17 Feb 2010 16:44:38 +0100
Subject: gallium: remove redundant nr_components field from
 pipe_vertex_element

This is a property of the associated src_format pipe format.
Hence use util_format_get_nr_components to query this when necessary instead.
---
 progs/gallium/python/retrace/interpreter.py      |  1 -
 src/gallium/auxiliary/draw/draw_pt.c             |  5 ++---
 src/gallium/auxiliary/util/u_draw_quad.c         |  1 -
 src/gallium/auxiliary/util/u_dump_state.c        |  1 -
 src/gallium/auxiliary/vl/vl_compositor.c         |  2 --
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c |  8 -------
 src/gallium/drivers/i965/brw_draw_upload.c       |  5 ++++-
 src/gallium/drivers/nv50/nv50_vbo.c              | 27 ++++++++++++++----------
 src/gallium/drivers/trace/tr_dump_state.c        |  1 -
 src/gallium/include/pipe/p_state.h               |  1 -
 src/gallium/state_trackers/vega/polygon.c        |  1 -
 src/mesa/state_tracker/st_draw.c                 |  3 ---
 src/mesa/state_tracker/st_draw_feedback.c        |  1 -
 13 files changed, 22 insertions(+), 35 deletions(-)

diff --git a/progs/gallium/python/retrace/interpreter.py b/progs/gallium/python/retrace/interpreter.py
index 2db71a212f..52c08abdd3 100755
--- a/progs/gallium/python/retrace/interpreter.py
+++ b/progs/gallium/python/retrace/interpreter.py
@@ -551,7 +551,6 @@ class Context(Object):
                 data = vbuf.buffer.read()
                 values = unpack_from(format, data, offset)
                 sys.stdout.write('\t\t{' + ', '.join(map(str, values)) + '},\n')
-                assert len(values) == velem.nr_components
             sys.stdout.write('\t},\n')
         sys.stdout.flush()
 
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 341353f628..51beba50e4 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -309,9 +309,8 @@ draw_arrays_instanced(struct draw_context *draw,
       tgsi_dump(draw->vs.vertex_shader->state.tokens, 0);
       debug_printf("Elements:\n");
       for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
-         debug_printf("  format=%s comps=%u\n",
-                      util_format_name(draw->pt.vertex_element[i].src_format),
-                      draw->pt.vertex_element[i].nr_components);
+         debug_printf("  format=%s\n",
+                      util_format_name(draw->pt.vertex_element[i].src_format));
       }
       debug_printf("Buffers:\n");
       for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c
index 14506e8451..36e9a4941f 100644
--- a/src/gallium/auxiliary/util/u_draw_quad.c
+++ b/src/gallium/auxiliary/util/u_draw_quad.c
@@ -64,7 +64,6 @@ util_draw_vertex_buffer(struct pipe_context *pipe,
       velements[i].instance_divisor = 0;
       velements[i].vertex_buffer_index = 0;
       velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
-      velements[i].nr_components = 4;
    }
    pipe->set_vertex_elements(pipe, num_attribs, velements);
 
diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c
index eaf4ec90f2..196c5e06e9 100644
--- a/src/gallium/auxiliary/util/u_dump_state.c
+++ b/src/gallium/auxiliary/util/u_dump_state.c
@@ -701,7 +701,6 @@ util_dump_vertex_element(struct os_stream *stream, const struct pipe_vertex_elem
    util_dump_member(stream, uint, state, src_offset);
 
    util_dump_member(stream, uint, state, vertex_buffer_index);
-   util_dump_member(stream, uint, state, nr_components);
 
    util_dump_member(stream, format, state, src_format);
 
diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index ba23435f69..5d61423f9d 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -317,7 +317,6 @@ init_buffers(struct vl_compositor *c)
    c->vertex_elems[0].src_offset = 0;
    c->vertex_elems[0].instance_divisor = 0;
    c->vertex_elems[0].vertex_buffer_index = 0;
-   c->vertex_elems[0].nr_components = 2;
    c->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    /*
@@ -347,7 +346,6 @@ init_buffers(struct vl_compositor *c)
    c->vertex_elems[1].src_offset = 0;
    c->vertex_elems[1].instance_divisor = 0;
    c->vertex_elems[1].vertex_buffer_index = 1;
-   c->vertex_elems[1].nr_components = 2;
    c->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    /*
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index f323de0ea5..6b9ecd4268 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -892,56 +892,48 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    r->vertex_elems[0].src_offset = 0;
    r->vertex_elems[0].instance_divisor = 0;
    r->vertex_elems[0].vertex_buffer_index = 0;
-   r->vertex_elems[0].nr_components = 2;
    r->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    /* Luma, texcoord element */
    r->vertex_elems[1].src_offset = sizeof(struct vertex2f);
    r->vertex_elems[1].instance_divisor = 0;
    r->vertex_elems[1].vertex_buffer_index = 0;
-   r->vertex_elems[1].nr_components = 2;
    r->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    /* Chroma Cr texcoord element */
    r->vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
    r->vertex_elems[2].instance_divisor = 0;
    r->vertex_elems[2].vertex_buffer_index = 0;
-   r->vertex_elems[2].nr_components = 2;
    r->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    /* Chroma Cb texcoord element */
    r->vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
    r->vertex_elems[3].instance_divisor = 0;
    r->vertex_elems[3].vertex_buffer_index = 0;
-   r->vertex_elems[3].nr_components = 2;
    r->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    /* First ref surface top field texcoord element */
    r->vertex_elems[4].src_offset = 0;
    r->vertex_elems[4].instance_divisor = 0;
    r->vertex_elems[4].vertex_buffer_index = 1;
-   r->vertex_elems[4].nr_components = 2;
    r->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    /* First ref surface bottom field texcoord element */
    r->vertex_elems[5].src_offset = sizeof(struct vertex2f);
    r->vertex_elems[5].instance_divisor = 0;
    r->vertex_elems[5].vertex_buffer_index = 1;
-   r->vertex_elems[5].nr_components = 2;
    r->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    /* Second ref surface top field texcoord element */
    r->vertex_elems[6].src_offset = 0;
    r->vertex_elems[6].instance_divisor = 0;
    r->vertex_elems[6].vertex_buffer_index = 2;
-   r->vertex_elems[6].nr_components = 2;
    r->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    /* Second ref surface bottom field texcoord element */
    r->vertex_elems[7].src_offset = sizeof(struct vertex2f);
    r->vertex_elems[7].instance_divisor = 0;
    r->vertex_elems[7].vertex_buffer_index = 2;
-   r->vertex_elems[7].nr_components = 2;
    r->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    r->vs_const_buf = pipe_buffer_create
diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c
index d59261557b..9eafdd4085 100644
--- a/src/gallium/drivers/i965/brw_draw_upload.c
+++ b/src/gallium/drivers/i965/brw_draw_upload.c
@@ -30,6 +30,7 @@
 
 #include "util/u_upload_mgr.h"
 #include "util/u_math.h"
+#include "util/u_format.h"
 
 #include "brw_draw.h"
 #include "brw_defines.h"
@@ -352,13 +353,15 @@ static int brw_emit_vertex_elements(struct brw_context *brw)
    OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + nr * 2) - 2));
    for (i = 0; i < nr; i++) {
       const struct pipe_vertex_element *input = &brw->curr.vertex_element[i];
+      unsigned nr_components = util_format_get_nr_components(input->src_format);
+
       uint32_t format = brw_translate_surface_format( input->src_format );
       uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
       uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
       uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
       uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;
 
-      switch (input->nr_components) {
+      switch (nr_components) {
       case 0: comp0 = BRW_VE1_COMPONENT_STORE_0;
       case 1: comp1 = BRW_VE1_COMPONENT_STORE_0;
       case 2: comp2 = BRW_VE1_COMPONENT_STORE_0;
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index 1c8ee0b9ad..909d323e05 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -139,15 +139,16 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve)
 	uint32_t hw_type, hw_size;
 	enum pipe_format pf = ve->src_format;
 	const struct util_format_description *desc;
-	unsigned size;
+	unsigned size, nr_components;
 
 	desc = util_format_description(pf);
 	assert(desc);
 
 	size = util_format_get_component_bits(pf, UTIL_FORMAT_COLORSPACE_RGB, 0);
+	nr_components = util_format_get_nr_components(pf);
 
 	hw_type = nv50_vbo_type_to_hw(pf);
-	hw_size = nv50_vbo_size_to_hw(size, ve->nr_components);
+	hw_size = nv50_vbo_size_to_hw(size, nr_components);
 
 	if (!hw_type || !hw_size) {
 		NOUVEAU_ERR("unsupported vbo format: %s\n", util_format_name(pf));
@@ -222,11 +223,13 @@ nv50_set_static_vtxattr(struct nv50_context *nv50, unsigned i, void *data)
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
 	struct nouveau_channel *chan = tesla->channel;
 	float v[4];
+	unsigned nr_components = util_format_get_nr_components(nv50->vtxelt[i].src_format);
+	
 
 	util_format_read_4f(nv50->vtxelt[i].src_format,
 			    v, 0, data, 0, 0, 0, 1, 1);
 
-	switch (nv50->vtxelt[i].nr_components) {
+	switch (nr_components) {
 	case 4:
 		BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_4F_X(i), 4);
 		OUT_RINGf (chan, v[0]);
@@ -726,6 +729,7 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
 	struct nouveau_bo *bo = nouveau_bo(vb->buffer);
 	float v[4];
 	int ret;
+	unsigned nr_components = util_format_get_nr_components(ve->src_format);
 
 	ret = nouveau_bo_map(bo, NOUVEAU_BO_RD);
 	if (ret)
@@ -738,7 +742,7 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
 	if (!so)
 		*pso = so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 4, 0);
 
-	switch (ve->nr_components) {
+	switch (nr_components) {
 	case 4:
 		so_method(so, tesla, NV50TCL_VTX_ATTR_4F_X(attrib), 4);
 		so_data  (so, fui(v[0]));
@@ -1019,7 +1023,7 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit,
 	for (i = 0; i < nv50->vtxelt_nr; ++i) {
 		struct pipe_vertex_element *ve;
 		struct pipe_vertex_buffer *vb;
-		unsigned n, size;
+		unsigned n, size, nr_components;
 		const struct util_format_description *desc;
 
 		ve = &nv50->vtxelt[i];
@@ -1038,8 +1042,9 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit,
 
 		size = util_format_get_component_bits(
 			ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0);
+		nr_components = util_format_get_nr_components(ve->src_format);
 
-		assert(ve->nr_components > 0 && ve->nr_components <= 4);
+		assert(nr_components > 0 && nr_components <= 4);
 
 		/* It shouldn't be necessary to push the implicit 1s
 		 * for case 3 and size 8 cases 1, 2, 3.
@@ -1049,25 +1054,25 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit,
 			NOUVEAU_ERR("unsupported vtxelt size: %u\n", size);
 			return FALSE;
 		case 32:
-			switch (ve->nr_components) {
+			switch (nr_components) {
 			case 1: emit->push[n] = emit_b32_1; break;
 			case 2: emit->push[n] = emit_b32_2; break;
 			case 3: emit->push[n] = emit_b32_3; break;
 			case 4: emit->push[n] = emit_b32_4; break;
 			}
-			emit->vtx_dwords += ve->nr_components;
+			emit->vtx_dwords += nr_components;
 			break;
 		case 16:
-			switch (ve->nr_components) {
+			switch (nr_components) {
 			case 1: emit->push[n] = emit_b16_1; break;
 			case 2: emit->push[n] = emit_b32_1; break;
 			case 3: emit->push[n] = emit_b16_3; break;
 			case 4: emit->push[n] = emit_b32_2; break;
 			}
-			emit->vtx_dwords += (ve->nr_components + 1) >> 1;
+			emit->vtx_dwords += (nr_components + 1) >> 1;
 			break;
 		case 8:
-			switch (ve->nr_components) {
+			switch (nr_components) {
 			case 1: emit->push[n] = emit_b08_1; break;
 			case 2: emit->push[n] = emit_b16_1; break;
 			case 3: emit->push[n] = emit_b08_3; break;
diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c
index 6da186a655..b9ccc0ca69 100644
--- a/src/gallium/drivers/trace/tr_dump_state.c
+++ b/src/gallium/drivers/trace/tr_dump_state.c
@@ -480,7 +480,6 @@ void trace_dump_vertex_element(const struct pipe_vertex_element *state)
    trace_dump_member(uint, state, src_offset);
 
    trace_dump_member(uint, state, vertex_buffer_index);
-   trace_dump_member(uint, state, nr_components);
 
    trace_dump_member(format, state, src_format);
 
diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h
index 5ac5c87813..5670f7a088 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -383,7 +383,6 @@ struct pipe_vertex_element
     * this attribute live in?
     */
    unsigned vertex_buffer_index:8;
-   unsigned nr_components:8;
  
    enum pipe_format src_format; 	   /**< PIPE_FORMAT_* */
 };
diff --git a/src/gallium/state_trackers/vega/polygon.c b/src/gallium/state_trackers/vega/polygon.c
index c06dbf5206..96fbbfc85a 100644
--- a/src/gallium/state_trackers/vega/polygon.c
+++ b/src/gallium/state_trackers/vega/polygon.c
@@ -296,7 +296,6 @@ static void draw_polygon(struct vg_context *ctx,
    velement.instance_divisor = 0;
    velement.vertex_buffer_index = 0;
    velement.src_format = PIPE_FORMAT_R32G32_FLOAT;
-   velement.nr_components = COMPONENTS;
    pipe->set_vertex_elements(pipe, 1, &velement);
 
    /* draw */
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index 4b48c168e9..397dddbb47 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -368,7 +368,6 @@ setup_interleaved_attribs(GLcontext *ctx,
          (unsigned) (arrays[mesaAttr]->Ptr - offset0);
       velements[attr].instance_divisor = 0;
       velements[attr].vertex_buffer_index = 0;
-      velements[attr].nr_components = arrays[mesaAttr]->Size;
       velements[attr].src_format =
          st_pipe_vertex_format(arrays[mesaAttr]->Type,
                                arrays[mesaAttr]->Size,
@@ -458,7 +457,6 @@ setup_non_interleaved_attribs(GLcontext *ctx,
       vbuffer[attr].max_index = max_index;
       velements[attr].instance_divisor = 0;
       velements[attr].vertex_buffer_index = attr;
-      velements[attr].nr_components = arrays[mesaAttr]->Size;
       velements[attr].src_format
          = st_pipe_vertex_format(arrays[mesaAttr]->Type,
                                  arrays[mesaAttr]->Size,
@@ -596,7 +594,6 @@ st_draw_vbo(GLcontext *ctx,
       for (i = 0; i < num_velements; i++) {
          printf("vlements[%d].vbuffer_index = %u\n", i, velements[i].vertex_buffer_index);
          printf("vlements[%d].src_offset = %u\n", i, velements[i].src_offset);
-         printf("vlements[%d].nr_comps = %u\n", i, velements[i].nr_components);
          printf("vlements[%d].format = %s\n", i, util_format_name(velements[i].src_format));
       }
    }
diff --git a/src/mesa/state_tracker/st_draw_feedback.c b/src/mesa/state_tracker/st_draw_feedback.c
index 087f2f22bb..26a5b3fcd6 100644
--- a/src/mesa/state_tracker/st_draw_feedback.c
+++ b/src/mesa/state_tracker/st_draw_feedback.c
@@ -178,7 +178,6 @@ st_feedback_draw_vbo(GLcontext *ctx,
       vbuffers[attr].max_index = max_index;
       velements[attr].instance_divisor = 0;
       velements[attr].vertex_buffer_index = attr;
-      velements[attr].nr_components = arrays[mesaAttr]->Size;
       velements[attr].src_format = 
          st_pipe_vertex_format(arrays[mesaAttr]->Type,
                                arrays[mesaAttr]->Size,
-- 
cgit v1.2.3


From 51d139f03898e5e46af6363c6bba131455738cc4 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Thu, 18 Feb 2010 16:36:25 +0100
Subject: gallium: use cso state handling for pipe_vertex_element state

---
 src/gallium/auxiliary/cso_cache/cso_cache.c   | 21 ++++++++
 src/gallium/auxiliary/cso_cache/cso_cache.h   | 11 +++-
 src/gallium/auxiliary/cso_cache/cso_context.c | 74 +++++++++++++++++++++++++++
 src/gallium/auxiliary/cso_cache/cso_context.h |  7 ++-
 src/gallium/include/pipe/p_context.h          |  9 ++--
 5 files changed, 117 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c
index a6a07e72c2..900c64df4b 100644
--- a/src/gallium/auxiliary/cso_cache/cso_cache.c
+++ b/src/gallium/auxiliary/cso_cache/cso_cache.c
@@ -43,6 +43,7 @@ struct cso_cache {
    struct cso_hash *vs_hash;
    struct cso_hash *rasterizer_hash;
    struct cso_hash *sampler_hash;
+   struct cso_hash *velements_hash;
    int    max_size;
 
    cso_sanitize_callback sanitize_cb;
@@ -108,6 +109,9 @@ static struct cso_hash *_cso_hash_for_type(struct cso_cache *sc, enum cso_cache_
    case CSO_VERTEX_SHADER:
       hash = sc->vs_hash;
       break;
+   case CSO_VELEMENTS:
+      hash = sc->velements_hash;
+      break;
    }
 
    return hash;
@@ -161,6 +165,13 @@ static void delete_vs_state(void *state, void *data)
    FREE(state);
 }
 
+static void delete_velements(void *state, void *data)
+{
+   struct cso_velements *cso = (struct cso_velements *)state;
+   if (cso->delete_state)
+      cso->delete_state(cso->context, cso->data);
+   FREE(state);
+}
 
 static INLINE void delete_cso(void *state, enum cso_cache_type type)
 {
@@ -183,6 +194,9 @@ static INLINE void delete_cso(void *state, enum cso_cache_type type)
    case CSO_VERTEX_SHADER:
       delete_vs_state(state, 0);
       break;
+   case CSO_VELEMENTS:
+      delete_velements(state, 0);
+      break;
    default:
       assert(0);
       FREE(state);
@@ -294,6 +308,7 @@ struct cso_cache *cso_cache_create(void)
    sc->rasterizer_hash    = cso_hash_create();
    sc->fs_hash            = cso_hash_create();
    sc->vs_hash            = cso_hash_create();
+   sc->velements_hash     = cso_hash_create();
    sc->sanitize_cb        = sanitize_cb;
    sc->sanitize_data      = 0;
 
@@ -325,6 +340,9 @@ void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type,
    case CSO_VERTEX_SHADER:
       hash = sc->vs_hash;
       break;
+   case CSO_VELEMENTS:
+      hash = sc->velements_hash;
+      break;
    }
 
    iter = cso_hash_first_node(hash);
@@ -351,6 +369,7 @@ void cso_cache_delete(struct cso_cache *sc)
    cso_for_each_state(sc, CSO_VERTEX_SHADER, delete_vs_state, 0);
    cso_for_each_state(sc, CSO_RASTERIZER, delete_rasterizer_state, 0);
    cso_for_each_state(sc, CSO_SAMPLER, delete_sampler_state, 0);
+   cso_for_each_state(sc, CSO_VELEMENTS, delete_velements, 0);
 
    cso_hash_delete(sc->blend_hash);
    cso_hash_delete(sc->sampler_hash);
@@ -358,6 +377,7 @@ void cso_cache_delete(struct cso_cache *sc)
    cso_hash_delete(sc->rasterizer_hash);
    cso_hash_delete(sc->fs_hash);
    cso_hash_delete(sc->vs_hash);
+   cso_hash_delete(sc->velements_hash);
    FREE(sc);
 }
 
@@ -372,6 +392,7 @@ void cso_set_maximum_cache_size(struct cso_cache *sc, int number)
    sanitize_hash(sc, sc->vs_hash, CSO_VERTEX_SHADER, sc->max_size);
    sanitize_hash(sc, sc->rasterizer_hash, CSO_RASTERIZER, sc->max_size);
    sanitize_hash(sc, sc->sampler_hash, CSO_SAMPLER, sc->max_size);
+   sanitize_hash(sc, sc->velements_hash, CSO_VELEMENTS, sc->max_size);
 }
 
 int cso_maximum_cache_size(const struct cso_cache *sc)
diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h
index eea60b940b..d884d5410f 100644
--- a/src/gallium/auxiliary/cso_cache/cso_cache.h
+++ b/src/gallium/auxiliary/cso_cache/cso_cache.h
@@ -53,6 +53,7 @@
   * - rasterizer (old setup)
   * - sampler
   * - vertex shader
+  * - vertex elements
   *
   * Things that are not constant state objects include:
   * - blend_color
@@ -90,7 +91,8 @@ enum cso_cache_type {
    CSO_DEPTH_STENCIL_ALPHA,
    CSO_RASTERIZER,
    CSO_FRAGMENT_SHADER,
-   CSO_VERTEX_SHADER
+   CSO_VERTEX_SHADER,
+   CSO_VELEMENTS
 };
 
 typedef void (*cso_state_callback)(void *ctx, void *obj);
@@ -144,6 +146,13 @@ struct cso_sampler {
    struct pipe_context *context;
 };
 
+struct cso_velements {
+   struct pipe_vertex_element state[PIPE_MAX_ATTRIBS];
+   void *data;
+   cso_state_callback delete_state;
+   struct pipe_context *context;
+};
+
 unsigned cso_construct_key(void *item, int item_size);
 
 struct cso_cache *cso_cache_create(void);
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index b5241fa64c..95e3c18e53 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -89,6 +89,7 @@ struct cso_context {
    void *rasterizer, *rasterizer_saved;
    void *fragment_shader, *fragment_shader_saved, *geometry_shader;
    void *vertex_shader, *vertex_shader_saved, *geometry_shader_saved;
+   void *velements, *velements_saved;
 
    struct pipe_framebuffer_state fb, fb_saved;
    struct pipe_viewport_state vp, vp_saved;
@@ -171,6 +172,20 @@ static boolean delete_vs_state(struct cso_context *ctx, void *state)
    return FALSE;
 }
 
+static boolean delete_vertex_elements(struct cso_context *ctx,
+                                      void *state)
+{
+   struct cso_velements *cso = (struct cso_velements *)state;
+
+   if (ctx->velements == cso->data)
+      return FALSE;
+
+   if (cso->delete_state)
+      cso->delete_state(cso->context, cso->data);
+   FREE(state);
+   return TRUE;
+}
+
 
 static INLINE boolean delete_cso(struct cso_context *ctx,
                                  void *state, enum cso_cache_type type)
@@ -194,6 +209,9 @@ static INLINE boolean delete_cso(struct cso_context *ctx,
    case CSO_VERTEX_SHADER:
       return delete_vs_state(ctx, state);
       break;
+   case CSO_VELEMENTS:
+      return delete_vertex_elements(ctx, state);
+      break;
    default:
       assert(0);
       FREE(state);
@@ -1126,3 +1144,59 @@ void cso_restore_geometry_shader(struct cso_context *ctx)
    }
    ctx->geometry_shader_saved = NULL;
 }
+
+enum pipe_error cso_set_vertex_elements(struct cso_context *ctx,
+                                        unsigned count,
+                                        const struct pipe_vertex_element *states)
+{
+   unsigned key_size, hash_key;
+   struct cso_hash_iter iter;
+   void *handle;
+
+   key_size = sizeof(struct pipe_vertex_element) * count;
+   hash_key = cso_construct_key((void*)states, key_size);
+   iter = cso_find_state_template(ctx->cache, hash_key, CSO_VELEMENTS, (void*)states, key_size);
+
+   if (cso_hash_iter_is_null(iter)) {
+      struct cso_velements *cso = MALLOC(sizeof(struct cso_velements));
+      if (!cso)
+         return PIPE_ERROR_OUT_OF_MEMORY;
+
+      memcpy(&cso->state, states, key_size);
+      cso->data = ctx->pipe->create_vertex_elements_state(ctx->pipe, count, &cso->state[0]);
+      cso->delete_state = (cso_state_callback)ctx->pipe->delete_vertex_elements_state;
+      cso->context = ctx->pipe;
+
+      iter = cso_insert_state(ctx->cache, hash_key, CSO_VELEMENTS, cso);
+      if (cso_hash_iter_is_null(iter)) {
+         FREE(cso);
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      }
+
+      handle = cso->data;
+   }
+   else {
+      handle = ((struct cso_velements *)cso_hash_iter_data(iter))->data;
+   }
+
+   if (ctx->velements != handle) {
+      ctx->velements = handle;
+      ctx->pipe->bind_vertex_elements_state(ctx->pipe, handle);
+   }
+   return PIPE_OK;
+}
+
+void cso_save_vertex_elements(struct cso_context *ctx)
+{
+   assert(!ctx->velements);
+   ctx->velements_saved = ctx->velements;
+}
+
+void cso_restore_vertex_elements(struct cso_context *ctx)
+{
+   if (ctx->velements != ctx->velements_saved) {
+      ctx->velements = ctx->velements_saved;
+      ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->velements_saved);
+   }
+   ctx->velements_saved = NULL;
+}
\ No newline at end of file
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h
index 707b3c2cee..2caa587516 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.h
+++ b/src/gallium/auxiliary/cso_cache/cso_context.h
@@ -122,6 +122,12 @@ void
 cso_restore_vertex_sampler_textures(struct cso_context *cso);
 
 
+enum pipe_error cso_set_vertex_elements(struct cso_context *ctx,
+                                        unsigned count,
+                                        const struct pipe_vertex_element *states);
+void cso_save_vertex_elements(struct cso_context *ctx);
+void cso_restore_vertex_elements(struct cso_context *ctx);
+
 
 /* These aren't really sensible -- most of the time the api provides
  * object semantics for shaders anyway, and the cases where it doesn't
@@ -157,7 +163,6 @@ void cso_save_geometry_shader(struct cso_context *cso);
 void cso_restore_geometry_shader(struct cso_context *cso);
 
 
-
 enum pipe_error cso_set_framebuffer(struct cso_context *cso,
                                     const struct pipe_framebuffer_state *fb);
 void cso_save_framebuffer(struct cso_context *cso);
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
index f82b77903e..376b01aa69 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -177,6 +177,12 @@ struct pipe_context {
    void   (*bind_gs_state)(struct pipe_context *, void *);
    void   (*delete_gs_state)(struct pipe_context *, void *);
 
+   void * (*create_vertex_elements_state)(struct pipe_context *,
+                                          unsigned num_elements,
+                                          const struct pipe_vertex_element *);
+   void   (*bind_vertex_elements_state)(struct pipe_context *, void *);
+   void   (*delete_vertex_elements_state)(struct pipe_context *, void *);
+
    /*@}*/
 
    /**
@@ -220,9 +226,6 @@ struct pipe_context {
                                unsigned num_buffers,
                                const struct pipe_vertex_buffer * );
 
-   void (*set_vertex_elements)( struct pipe_context *,
-                                unsigned num_elements,
-                                const struct pipe_vertex_element * );
    /*@}*/
 
 
-- 
cgit v1.2.3


From f2656c3e3cc91edcbf572d175efe9346a30b1da2 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Fri, 26 Feb 2010 14:16:46 +0100
Subject: nv50: adapt to vertex elements cso

---
 src/gallium/drivers/nv50/nv50_context.h | 10 +++-
 src/gallium/drivers/nv50/nv50_state.c   | 34 ++++++++++---
 src/gallium/drivers/nv50/nv50_vbo.c     | 84 +++++++++++++++++++--------------
 3 files changed, 85 insertions(+), 43 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index b4de3e2ba5..811b3ef9fe 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -72,6 +72,12 @@ struct nv50_sampler_stateobj {
 	unsigned tsc[8];
 };
 
+struct nv50_vtxelt_stateobj {
+	struct pipe_vertex_element pipe[16];
+	unsigned num_elements;
+	uint32_t hw[16];
+};
+
 static INLINE unsigned
 get_tile_height(uint32_t tile_mode)
 {
@@ -168,8 +174,7 @@ struct nv50_context {
 	struct pipe_buffer *constbuf[PIPE_SHADER_TYPES];
 	struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
 	unsigned vtxbuf_nr;
-	struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
-	unsigned vtxelt_nr;
+	struct nv50_vtxelt_stateobj *vtxelt;
 	struct nv50_sampler_stateobj *sampler[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
 	unsigned sampler_nr[PIPE_SHADER_TYPES];
 	struct nv50_miptree *miptree[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
@@ -217,6 +222,7 @@ extern void nv50_draw_elements_instanced(struct pipe_context *pipe,
 					 unsigned count,
 					 unsigned startInstance,
 					 unsigned instanceCount);
+extern void nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso);
 extern void nv50_vbo_validate(struct nv50_context *nv50);
 
 /* nv50_clear.c */
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index 7d304907b6..ffbf3473a1 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -720,15 +720,34 @@ nv50_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
 	nv50->dirty |= NV50_NEW_ARRAYS;
 }
 
+static void *
+nv50_vtxelts_state_create(struct pipe_context *pipe,
+			  unsigned num_elements,
+			  const struct pipe_vertex_element *elements)
+{
+	struct nv50_vtxelt_stateobj *cso = CALLOC_STRUCT(nv50_vtxelt_stateobj);
+
+	assert(num_elements < 16); /* not doing fallbacks yet */
+	cso->num_elements = num_elements;
+	memcpy(cso->pipe, elements, num_elements * sizeof(*elements));
+
+	nv50_vtxelt_construct(cso);
+
+	return (void *)cso;
+}
+
 static void
-nv50_set_vertex_elements(struct pipe_context *pipe, unsigned count,
-			 const struct pipe_vertex_element *ve)
+nv50_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
 {
-	struct nv50_context *nv50 = nv50_context(pipe);
+	FREE(hwcso);
+}
 
-	memcpy(nv50->vtxelt, ve, sizeof(*ve) * count);
-	nv50->vtxelt_nr = count;
+static void
+nv50_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+	struct nv50_context *nv50 = nv50_context(pipe);
 
+	nv50->vtxelt = hwcso;
 	nv50->dirty |= NV50_NEW_ARRAYS;
 }
 
@@ -778,7 +797,10 @@ nv50_init_state_functions(struct nv50_context *nv50)
 	nv50->pipe.set_scissor_state = nv50_set_scissor_state;
 	nv50->pipe.set_viewport_state = nv50_set_viewport_state;
 
+	nv50->pipe.create_vertex_elements_state = nv50_vtxelts_state_create;
+	nv50->pipe.delete_vertex_elements_state = nv50_vtxelts_state_delete;
+	nv50->pipe.bind_vertex_elements_state = nv50_vtxelts_state_bind;
+
 	nv50->pipe.set_vertex_buffers = nv50_set_vertex_buffers;
-	nv50->pipe.set_vertex_elements = nv50_set_vertex_elements;
 }
 
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index 909d323e05..c1dcb93b48 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -223,11 +223,10 @@ nv50_set_static_vtxattr(struct nv50_context *nv50, unsigned i, void *data)
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
 	struct nouveau_channel *chan = tesla->channel;
 	float v[4];
-	unsigned nr_components = util_format_get_nr_components(nv50->vtxelt[i].src_format);
-	
+	enum pipe_format pf = nv50->vtxelt->pipe[i].src_format;
+	unsigned nr_components = util_format_get_nr_components(pf);
 
-	util_format_read_4f(nv50->vtxelt[i].src_format,
-			    v, 0, data, 0, 0, 0, 1, 1);
+	util_format_read_4f(pf, v, 0, data, 0, 0, 0, 1, 1);
 
 	switch (nr_components) {
 	case 4:
@@ -266,16 +265,17 @@ init_per_instance_arrays_immd(struct nv50_context *nv50,
 	struct nouveau_bo *bo;
 	unsigned i, b, count = 0;
 
-	for (i = 0; i < nv50->vtxelt_nr; ++i) {
-		if (!nv50->vtxelt[i].instance_divisor)
+	for (i = 0; i < nv50->vtxelt->num_elements; ++i) {
+		if (!nv50->vtxelt->pipe[i].instance_divisor)
 			continue;
 		++count;
-		b = nv50->vtxelt[i].vertex_buffer_index;
+		b = nv50->vtxelt->pipe[i].vertex_buffer_index;
 
-		pos[i] = nv50->vtxelt[i].src_offset +
+		pos[i] = nv50->vtxelt->pipe[i].src_offset +
 			nv50->vtxbuf[b].buffer_offset +
 			startInstance * nv50->vtxbuf[b].stride;
-		step[i] = startInstance % nv50->vtxelt[i].instance_divisor;
+		step[i] = startInstance %
+			nv50->vtxelt->pipe[i].instance_divisor;
 
 		bo = nouveau_bo(nv50->vtxbuf[b].buffer);
 		if (!bo->map)
@@ -296,22 +296,22 @@ init_per_instance_arrays(struct nv50_context *nv50,
 	struct nouveau_channel *chan = tesla->channel;
 	struct nouveau_bo *bo;
 	struct nouveau_stateobj *so;
-	unsigned i, b, count = 0;
+	unsigned i, b, count = 0, num_elements = nv50->vtxelt->num_elements;
 	const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
 
 	if (nv50->vbo_fifo)
 		return init_per_instance_arrays_immd(nv50, startInstance,
 						     pos, step);
 
-	so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2);
+	so = so_new(num_elements, num_elements * 2, num_elements * 2);
 
-	for (i = 0; i < nv50->vtxelt_nr; ++i) {
-		if (!nv50->vtxelt[i].instance_divisor)
+	for (i = 0; i < nv50->vtxelt->num_elements; ++i) {
+		if (!nv50->vtxelt->pipe[i].instance_divisor)
 			continue;
 		++count;
-		b = nv50->vtxelt[i].vertex_buffer_index;
+		b = nv50->vtxelt->pipe[i].vertex_buffer_index;
 
-		pos[i] = nv50->vtxelt[i].src_offset +
+		pos[i] = nv50->vtxelt->pipe[i].src_offset +
 			nv50->vtxbuf[b].buffer_offset +
 			startInstance * nv50->vtxbuf[b].stride;
 
@@ -319,7 +319,8 @@ init_per_instance_arrays(struct nv50_context *nv50,
 			step[i] = 0;
 			continue;
 		}
-		step[i] = startInstance % nv50->vtxelt[i].instance_divisor;
+		step[i] = startInstance %
+			nv50->vtxelt->pipe[i].instance_divisor;
 
 		bo = nouveau_bo(nv50->vtxbuf[b].buffer);
 
@@ -344,12 +345,12 @@ step_per_instance_arrays_immd(struct nv50_context *nv50,
 	struct nouveau_bo *bo;
 	unsigned i, b;
 
-	for (i = 0; i < nv50->vtxelt_nr; ++i) {
-		if (!nv50->vtxelt[i].instance_divisor)
+	for (i = 0; i < nv50->vtxelt->num_elements; ++i) {
+		if (!nv50->vtxelt->pipe[i].instance_divisor)
 			continue;
-		if (++step[i] != nv50->vtxelt[i].instance_divisor)
+		if (++step[i] != nv50->vtxelt->pipe[i].instance_divisor)
 			continue;
-		b = nv50->vtxelt[i].vertex_buffer_index;
+		b = nv50->vtxelt->pipe[i].vertex_buffer_index;
 		bo = nouveau_bo(nv50->vtxbuf[b].buffer);
 
 		step[i] = 0;
@@ -367,7 +368,7 @@ step_per_instance_arrays(struct nv50_context *nv50,
 	struct nouveau_channel *chan = tesla->channel;
 	struct nouveau_bo *bo;
 	struct nouveau_stateobj *so;
-	unsigned i, b;
+	unsigned i, b, num_elements = nv50->vtxelt->num_elements;
 	const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
 
 	if (nv50->vbo_fifo) {
@@ -375,14 +376,14 @@ step_per_instance_arrays(struct nv50_context *nv50,
 		return;
 	}
 
-	so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2);
+	so = so_new(num_elements, num_elements * 2, num_elements * 2);
 
-	for (i = 0; i < nv50->vtxelt_nr; ++i) {
-		if (!nv50->vtxelt[i].instance_divisor)
+	for (i = 0; i < nv50->vtxelt->num_elements; ++i) {
+		if (!nv50->vtxelt->pipe[i].instance_divisor)
 			continue;
-		b = nv50->vtxelt[i].vertex_buffer_index;
+		b = nv50->vtxelt->pipe[i].vertex_buffer_index;
 
-		if (++step[i] == nv50->vtxelt[i].instance_divisor) {
+		if (++step[i] == nv50->vtxelt->pipe[i].instance_divisor) {
 			step[i] = 0;
 			pos[i] += nv50->vtxbuf[b].stride;
 		}
@@ -740,7 +741,8 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
 			    0, 0, 1, 1);
 	so = *pso;
 	if (!so)
-		*pso = so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 4, 0);
+		*pso = so = so_new(nv50->vtxelt->num_elements,
+				   nv50->vtxelt->num_elements * 4, 0);
 
 	switch (nr_components) {
 	case 4:
@@ -778,6 +780,18 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
 	return TRUE;
 }
 
+void
+nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso)
+{
+	unsigned i;
+
+	for (i = 0; i < cso->num_elements; ++i) {
+		struct pipe_vertex_element *ve = &cso->pipe[i];
+
+		cso->hw[i] = nv50_vbo_vtxelt_to_hw(ve);
+	}
+}
+
 void
 nv50_vbo_validate(struct nv50_context *nv50)
 {
@@ -798,19 +812,19 @@ nv50_vbo_validate(struct nv50_context *nv50)
 	if (NV50_USING_LOATHED_EDGEFLAG(nv50))
 		nv50->vbo_fifo = 0xffff; /* vertprog can't set edgeflag */
 
-	n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr);
+	n_ve = MAX2(nv50->vtxelt->num_elements, nv50->state.vtxelt_nr);
 
 	vtxattr = NULL;
-	vtxbuf = so_new(n_ve * 2, n_ve * 5, nv50->vtxelt_nr * 4);
+	vtxbuf = so_new(n_ve * 2, n_ve * 5, nv50->vtxelt->num_elements * 4);
 	vtxfmt = so_new(1, n_ve, 0);
 	so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0), n_ve);
 
-	for (i = 0; i < nv50->vtxelt_nr; i++) {
-		struct pipe_vertex_element *ve = &nv50->vtxelt[i];
+	for (i = 0; i < nv50->vtxelt->num_elements; i++) {
+		struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i];
 		struct pipe_vertex_buffer *vb =
 			&nv50->vtxbuf[ve->vertex_buffer_index];
 		struct nouveau_bo *bo = nouveau_bo(vb->buffer);
-		uint32_t hw = nv50_vbo_vtxelt_to_hw(ve);
+		uint32_t hw = nv50->vtxelt->hw[i];
 
 		if (!vb->stride &&
 		    nv50_vbo_static_attrib(nv50, i, &vtxattr, ve, vb)) {
@@ -859,7 +873,7 @@ nv50_vbo_validate(struct nv50_context *nv50)
 		so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
 		so_data  (vtxbuf, 0);
 	}
-	nv50->state.vtxelt_nr = nv50->vtxelt_nr;
+	nv50->state.vtxelt_nr = nv50->vtxelt->num_elements;
 
 	so_ref (vtxfmt, &nv50->state.vtxfmt);
 	so_ref (vtxbuf, &nv50->state.vtxbuf);
@@ -1020,13 +1034,13 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit,
 	emit->nr_ve = 0;
 	emit->vtx_dwords = 0;
 
-	for (i = 0; i < nv50->vtxelt_nr; ++i) {
+	for (i = 0; i < nv50->vtxelt->num_elements; ++i) {
 		struct pipe_vertex_element *ve;
 		struct pipe_vertex_buffer *vb;
 		unsigned n, size, nr_components;
 		const struct util_format_description *desc;
 
-		ve = &nv50->vtxelt[i];
+		ve = &nv50->vtxelt->pipe[i];
 		vb = &nv50->vtxbuf[ve->vertex_buffer_index];
 		if (!(nv50->vbo_fifo & (1 << i)) || ve->instance_divisor)
 			continue;
-- 
cgit v1.2.3


From 18603a2f07b99bfdbaab35b38b292233fc3e7689 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Mon, 1 Mar 2010 18:36:19 +0100
Subject: st/mesa: fix mesa statetracker adaption to new vertex elements
 interface

---
 src/mesa/state_tracker/st_draw.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index 397dddbb47..15d1299a9e 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -57,6 +57,7 @@
 #include "pipe/p_defines.h"
 #include "util/u_inlines.h"
 #include "util/u_format.h"
+#include "cso_cache/cso_context.h"
 
 
 static GLuint double_types[4] = {
@@ -600,7 +601,7 @@ st_draw_vbo(GLcontext *ctx,
 #endif
 
    pipe->set_vertex_buffers(pipe, num_vbuffers, vbuffer);
-   pipe->set_vertex_elements(pipe, num_velements, velements);
+   cso_set_vertex_elements(ctx->st->cso_context, num_velements, velements);
 
    if (num_vbuffers == 0 || num_velements == 0)
       return;
-- 
cgit v1.2.3


From ac4abaecd5f52e416c89bfe19b34ed7f4e014b21 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Mon, 1 Mar 2010 18:40:22 +0100
Subject: util: adapt blitter code to new vertex element cso

The interface of util_draw_vertex_buffer looks a bit odd (calling code has to
set vertex elements but not vertex buffers) but due to the way cso state
handling generally works (can't re-bind original vertex element state easily
there) I guess that's ok for now.
---
 src/gallium/auxiliary/util/u_blit.c       | 16 ++++++++++++++++
 src/gallium/auxiliary/util/u_blitter.c    | 23 ++++++++++++++++++++++-
 src/gallium/auxiliary/util/u_blitter.h    |  8 ++++++++
 src/gallium/auxiliary/util/u_draw_quad.c  | 11 +----------
 src/gallium/auxiliary/util/u_gen_mipmap.c | 13 +++++++++++++
 5 files changed, 60 insertions(+), 11 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index f0bc58a558..0d93e23df8 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -62,6 +62,7 @@ struct blit_state
    struct pipe_rasterizer_state rasterizer;
    struct pipe_sampler_state sampler;
    struct pipe_viewport_state viewport;
+   struct pipe_vertex_element velem[2];
 
    void *vs;
    void *fs[TGSI_WRITEMASK_XYZW + 1];
@@ -115,6 +116,15 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
    ctx->sampler.normalized_coords = 1;
 
 
+   /* vertex elements state */
+   memset(&ctx->velem[0], 0, sizeof(ctx->velem[0]) * 2);
+   for (i = 0; i < 2; i++) {
+      ctx->velem[i].src_offset = i * 4 * sizeof(float);
+      ctx->velem[i].instance_divisor = 0;
+      ctx->velem[i].vertex_buffer_index = 0;
+      ctx->velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+   }
+
    /* vertex shader - still required to provide the linkage between
     * fragment shader input semantics and vertex_element/buffers.
     */
@@ -410,11 +420,13 @@ util_blit_pixels_writemask(struct blit_state *ctx,
    cso_save_framebuffer(ctx->cso);
    cso_save_fragment_shader(ctx->cso);
    cso_save_vertex_shader(ctx->cso);
+   cso_save_vertex_elements(ctx->cso);
 
    /* set misc state we care about */
    cso_set_blend(ctx->cso, &ctx->blend);
    cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
    cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
+   cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
 
    /* sampler */
    ctx->sampler.min_img_filter = filter;
@@ -464,6 +476,7 @@ util_blit_pixels_writemask(struct blit_state *ctx,
    cso_restore_framebuffer(ctx->cso);
    cso_restore_fragment_shader(ctx->cso);
    cso_restore_vertex_shader(ctx->cso);
+   cso_restore_vertex_elements(ctx->cso);
 
    pipe_texture_reference(&tex, NULL);
 }
@@ -547,11 +560,13 @@ util_blit_pixels_tex(struct blit_state *ctx,
    cso_save_framebuffer(ctx->cso);
    cso_save_fragment_shader(ctx->cso);
    cso_save_vertex_shader(ctx->cso);
+   cso_save_vertex_elements(ctx->cso);
 
    /* set misc state we care about */
    cso_set_blend(ctx->cso, &ctx->blend);
    cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
    cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
+   cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
 
    /* sampler */
    ctx->sampler.min_img_filter = filter;
@@ -596,4 +611,5 @@ util_blit_pixels_tex(struct blit_state *ctx,
    cso_restore_framebuffer(ctx->cso);
    cso_restore_fragment_shader(ctx->cso);
    cso_restore_vertex_shader(ctx->cso);
+   cso_restore_vertex_elements(ctx->cso);
 }
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index 18f8606818..3542a2e444 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -88,6 +88,8 @@ struct blitter_context_priv
    void *dsa_write_depth_keep_stencil;
    void *dsa_keep_depth_stencil;
 
+   void *velem_state;
+
    /* Sampler state for clamping to a miplevel. */
    void *sampler_state[PIPE_MAX_TEXTURE_LEVELS];
 
@@ -102,6 +104,7 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
    struct pipe_depth_stencil_alpha_state dsa = { { 0 } };
    struct pipe_rasterizer_state rs_state = { 0 };
    struct pipe_sampler_state *sampler_state;
+   struct pipe_vertex_element velem[2];
    unsigned i;
 
    ctx = CALLOC_STRUCT(blitter_context_priv);
@@ -116,6 +119,7 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
    ctx->blitter.saved_rs_state = INVALID_PTR;
    ctx->blitter.saved_fs = INVALID_PTR;
    ctx->blitter.saved_vs = INVALID_PTR;
+   ctx->blitter.saved_velem_state = INVALID_PTR;
    ctx->blitter.saved_fb_state.nr_cbufs = ~0;
    ctx->blitter.saved_num_textures = ~0;
    ctx->blitter.saved_num_sampler_states = ~0;
@@ -165,6 +169,16 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
    rs_state.flatshade = 1;
    ctx->rs_state = pipe->create_rasterizer_state(pipe, &rs_state);
 
+   /* vertex elements state */
+   memset(&velem[0], 0, sizeof(velem[0]) * 2);
+   for (i = 0; i < 2; i++) {
+      velem[i].src_offset = i * 4 * sizeof(float);
+      velem[i].instance_divisor = 0;
+      velem[i].vertex_buffer_index = 0;
+      velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+   }
+   ctx->velem_state = pipe->create_vertex_elements_state(pipe, 2, &velem[0]);
+
    /* fragment shaders are created on-demand */
 
    /* vertex shaders */
@@ -214,6 +228,7 @@ void util_blitter_destroy(struct blitter_context *blitter)
    pipe->delete_rasterizer_state(pipe, ctx->rs_state);
    pipe->delete_vs_state(pipe, ctx->vs_col);
    pipe->delete_vs_state(pipe, ctx->vs_tex);
+   pipe->delete_vertex_elements_state(pipe, ctx->velem_state);
 
    for (i = 0; i < PIPE_MAX_TEXTURE_TYPES; i++) {
       if (ctx->fs_texfetch_col[i])
@@ -241,7 +256,8 @@ static void blitter_check_saved_CSOs(struct blitter_context_priv *ctx)
           ctx->blitter.saved_dsa_state != INVALID_PTR &&
           ctx->blitter.saved_rs_state != INVALID_PTR &&
           ctx->blitter.saved_fs != INVALID_PTR &&
-          ctx->blitter.saved_vs != INVALID_PTR);
+          ctx->blitter.saved_vs != INVALID_PTR &&
+          ctx->blitter.saved_velem_state != INVALID_PTR);
 }
 
 static void blitter_restore_CSOs(struct blitter_context_priv *ctx)
@@ -254,12 +270,14 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx)
    pipe->bind_rasterizer_state(pipe, ctx->blitter.saved_rs_state);
    pipe->bind_fs_state(pipe, ctx->blitter.saved_fs);
    pipe->bind_vs_state(pipe, ctx->blitter.saved_vs);
+   pipe->bind_vertex_elements_state(pipe, ctx->blitter.saved_velem_state);
 
    ctx->blitter.saved_blend_state = INVALID_PTR;
    ctx->blitter.saved_dsa_state = INVALID_PTR;
    ctx->blitter.saved_rs_state = INVALID_PTR;
    ctx->blitter.saved_fs = INVALID_PTR;
    ctx->blitter.saved_vs = INVALID_PTR;
+   ctx->blitter.saved_velem_state = INVALID_PTR;
 
    pipe->set_stencil_ref(pipe, &ctx->blitter.saved_stencil_ref);
 
@@ -546,6 +564,7 @@ void util_blitter_clear(struct blitter_context *blitter,
       pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
 
    pipe->bind_rasterizer_state(pipe, ctx->rs_state);
+   pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
    pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, num_cbufs));
    pipe->bind_vs_state(pipe, ctx->vs_col);
 
@@ -611,6 +630,7 @@ static void util_blitter_do_copy(struct blitter_context *blitter,
    pipe->bind_vs_state(pipe, ctx->vs_tex);
    pipe->bind_fragment_sampler_states(pipe, 1,
       blitter_get_sampler_state(ctx, src->level));
+   pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
    pipe->set_fragment_sampler_textures(pipe, 1, &src->texture);
    pipe->set_framebuffer_state(pipe, &fb_state);
 
@@ -784,6 +804,7 @@ void util_blitter_fill(struct blitter_context *blitter,
    pipe->bind_rasterizer_state(pipe, ctx->rs_state);
    pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 1));
    pipe->bind_vs_state(pipe, ctx->vs_col);
+   pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
 
    /* set a framebuffer state */
    fb_state.width = dst->width;
diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h
index a2f17073ac..7d8a09edb5 100644
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -43,6 +43,7 @@ struct blitter_context
    /* Private members, really. */
    void *saved_blend_state;   /**< blend state */
    void *saved_dsa_state;     /**< depth stencil alpha state */
+   void *saved_velem_state;   /**< vertex elements state */
    void *saved_rs_state;      /**< rasterizer state */
    void *saved_fs, *saved_vs; /**< fragment shader, vertex shader */
 
@@ -170,6 +171,13 @@ void util_blitter_save_depth_stencil_alpha(struct blitter_context *blitter,
    blitter->saved_dsa_state = state;
 }
 
+static INLINE
+void util_blitter_save_vertex_elements(struct blitter_context *blitter,
+                                       void *state)
+{
+   blitter->saved_velem_state = state;
+}
+
 static INLINE
 void util_blitter_save_stencil_ref(struct blitter_context *blitter,
                                    const struct pipe_stencil_ref *state)
diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c
index 36e9a4941f..8c194102bf 100644
--- a/src/gallium/auxiliary/util/u_draw_quad.c
+++ b/src/gallium/auxiliary/util/u_draw_quad.c
@@ -45,8 +45,6 @@ util_draw_vertex_buffer(struct pipe_context *pipe,
                         uint num_attribs)
 {
    struct pipe_vertex_buffer vbuffer;
-   struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
-   uint i;
 
    assert(num_attribs <= PIPE_MAX_ATTRIBS);
 
@@ -58,14 +56,7 @@ util_draw_vertex_buffer(struct pipe_context *pipe,
    vbuffer.max_index = num_verts - 1;
    pipe->set_vertex_buffers(pipe, 1, &vbuffer);
 
-   /* tell pipe about the vertex attributes */
-   for (i = 0; i < num_attribs; i++) {
-      velements[i].src_offset = i * 4 * sizeof(float);
-      velements[i].instance_divisor = 0;
-      velements[i].vertex_buffer_index = 0;
-      velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
-   }
-   pipe->set_vertex_elements(pipe, num_attribs, velements);
+   /* note: vertex elements already set by caller */
 
    /* draw */
    pipe->draw_arrays(pipe, prim_type, 0, num_verts);
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index 4e358d3938..db726df9c3 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -61,6 +61,7 @@ struct gen_mipmap_state
    struct pipe_depth_stencil_alpha_state depthstencil;
    struct pipe_rasterizer_state rasterizer;
    struct pipe_sampler_state sampler;
+   struct pipe_vertex_element velem[2];
 
    void *vs;
    void *fs2d, *fsCube;
@@ -1307,6 +1308,15 @@ util_create_gen_mipmap(struct pipe_context *pipe,
    ctx->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
    ctx->sampler.normalized_coords = 1;
 
+   /* vertex elements state */
+   memset(&ctx->velem[0], 0, sizeof(ctx->velem[0]) * 2);
+   for (i = 0; i < 2; i++) {
+      ctx->velem[i].src_offset = i * 4 * sizeof(float);
+      ctx->velem[i].instance_divisor = 0;
+      ctx->velem[i].vertex_buffer_index = 0;
+      ctx->velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+   }
+
    /* vertex shader - still needed to specify mapping from fragment
     * shader input semantics to vertex elements 
     */
@@ -1499,11 +1509,13 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
    cso_save_framebuffer(ctx->cso);
    cso_save_fragment_shader(ctx->cso);
    cso_save_vertex_shader(ctx->cso);
+   cso_save_vertex_elements(ctx->cso);
 
    /* bind our state */
    cso_set_blend(ctx->cso, &ctx->blend);
    cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
    cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
+   cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
 
    cso_set_fragment_shader_handle(ctx->cso, fs);
    cso_set_vertex_shader_handle(ctx->cso, ctx->vs);
@@ -1578,4 +1590,5 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
    cso_restore_framebuffer(ctx->cso);
    cso_restore_fragment_shader(ctx->cso);
    cso_restore_vertex_shader(ctx->cso);
+   cso_restore_vertex_elements(ctx->cso);
 }
-- 
cgit v1.2.3


From 49d3f662b60be08328e7c1dc381d07f50c384601 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Mon, 1 Mar 2010 18:41:03 +0100
Subject: vl: adapt to new vertex element cso interface

---
 src/gallium/auxiliary/vl/vl_compositor.c         |  29 +++---
 src/gallium/auxiliary/vl/vl_compositor.h         |   2 +-
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 118 ++++++++++++-----------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |   4 +-
 4 files changed, 82 insertions(+), 71 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index 5d61423f9d..6d461cb880 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -230,6 +230,7 @@ static bool
 init_pipe_state(struct vl_compositor *c)
 {
    struct pipe_sampler_state sampler;
+   struct pipe_vertex_element vertex_elems[2];
 
    assert(c);
 
@@ -251,15 +252,27 @@ init_pipe_state(struct vl_compositor *c)
    /*sampler.border_color[i] = ;*/
    /*sampler.max_anisotropy = ;*/
    c->sampler = c->pipe->create_sampler_state(c->pipe, &sampler);
-	
+
+   vertex_elems[0].src_offset = 0;
+   vertex_elems[0].instance_divisor = 0;
+   vertex_elems[0].vertex_buffer_index = 0;
+   vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
+   vertex_elems[1].src_offset = 0;
+   vertex_elems[1].instance_divisor = 0;
+   vertex_elems[1].vertex_buffer_index = 1;
+   vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
+   c->vertex_elems = c->pipe->create_vertex_elements_state(c->pipe, 2, vertex_elems);
+
+
    return true;
 }
 
 static void cleanup_pipe_state(struct vl_compositor *c)
 {
    assert(c);
-	
+
    c->pipe->delete_sampler_state(c->pipe, c->sampler);
+   c->pipe->delete_vertex_elements_state(c->pipe, c->vertex_elems);
 }
 
 static bool
@@ -314,11 +327,6 @@ init_buffers(struct vl_compositor *c)
 
    pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[0].buffer);
 
-   c->vertex_elems[0].src_offset = 0;
-   c->vertex_elems[0].instance_divisor = 0;
-   c->vertex_elems[0].vertex_buffer_index = 0;
-   c->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
    /*
     * Create our texcoord buffer and texcoord buffer element
     * Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices
@@ -343,11 +351,6 @@ init_buffers(struct vl_compositor *c)
 
    pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[1].buffer);
 
-   c->vertex_elems[1].src_offset = 0;
-   c->vertex_elems[1].instance_divisor = 0;
-   c->vertex_elems[1].vertex_buffer_index = 1;
-   c->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
    /*
     * Create our vertex shader's constant buffer
     * Const buffer contains scaling and translation vectors
@@ -481,7 +484,7 @@ void vl_compositor_render(struct vl_compositor          *compositor,
    compositor->pipe->bind_vs_state(compositor->pipe, compositor->vertex_shader);
    compositor->pipe->bind_fs_state(compositor->pipe, compositor->fragment_shader);
    compositor->pipe->set_vertex_buffers(compositor->pipe, 2, compositor->vertex_bufs);
-   compositor->pipe->set_vertex_elements(compositor->pipe, 2, compositor->vertex_elems);
+   compositor->pipe->bind_vertex_elements_state(compositor->pipe, compositor->vertex_elems);
    compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_VERTEX, 0, compositor->vs_const_buf);
    compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_FRAGMENT, 0, compositor->fs_const_buf);
 
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
index 6a9a3fd7af..51755554da 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -43,10 +43,10 @@ struct vl_compositor
    void *sampler;
    void *vertex_shader;
    void *fragment_shader;
+   void *vertex_elems;
    struct pipe_viewport_state viewport;
    struct pipe_scissor_state scissor;
    struct pipe_vertex_buffer vertex_bufs[2];
-   struct pipe_vertex_element vertex_elems[2];
    struct pipe_buffer *vs_const_buf, *fs_const_buf;
 };
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 6b9ecd4268..0763b5bb0e 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -708,6 +708,7 @@ static bool
 init_pipe_state(struct vl_mpeg12_mc_renderer *r)
 {
    struct pipe_sampler_state sampler;
+   struct pipe_vertex_element vertex_elems[8];
    unsigned filters[5];
    unsigned i;
 
@@ -771,6 +772,59 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r)
       r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler);
    }
 
+   /* Position element */
+   vertex_elems[0].src_offset = 0;
+   vertex_elems[0].instance_divisor = 0;
+   vertex_elems[0].vertex_buffer_index = 0;
+   vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* Luma, texcoord element */
+   vertex_elems[1].src_offset = sizeof(struct vertex2f);
+   vertex_elems[1].instance_divisor = 0;
+   vertex_elems[1].vertex_buffer_index = 0;
+   vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* Chroma Cr texcoord element */
+   vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
+   vertex_elems[2].instance_divisor = 0;
+   vertex_elems[2].vertex_buffer_index = 0;
+   vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* Chroma Cb texcoord element */
+   vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
+   vertex_elems[3].instance_divisor = 0;
+   vertex_elems[3].vertex_buffer_index = 0;
+   vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* First ref surface top field texcoord element */
+   vertex_elems[4].src_offset = 0;
+   vertex_elems[4].instance_divisor = 0;
+   vertex_elems[4].vertex_buffer_index = 1;
+   vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* First ref surface bottom field texcoord element */
+   vertex_elems[5].src_offset = sizeof(struct vertex2f);
+   vertex_elems[5].instance_divisor = 0;
+   vertex_elems[5].vertex_buffer_index = 1;
+   vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* Second ref surface top field texcoord element */
+   vertex_elems[6].src_offset = 0;
+   vertex_elems[6].instance_divisor = 0;
+   vertex_elems[6].vertex_buffer_index = 2;
+   vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* Second ref surface bottom field texcoord element */
+   vertex_elems[7].src_offset = sizeof(struct vertex2f);
+   vertex_elems[7].instance_divisor = 0;
+   vertex_elems[7].vertex_buffer_index = 2;
+   vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* need versions with 4,6 and 8 vertex elems */
+   r->vertex_elems[0] = r->pipe->create_vertex_elements_state(r->pipe, 4, vertex_elems);
+   r->vertex_elems[1] = r->pipe->create_vertex_elements_state(r->pipe, 6, vertex_elems);
+   r->vertex_elems[2] = r->pipe->create_vertex_elements_state(r->pipe, 8, vertex_elems);
+
    return true;
 }
 
@@ -783,6 +837,8 @@ cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
 
    for (i = 0; i < 5; ++i)
       r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
+   for (i = 0; i < 3; i++)
+      r->pipe->delete_vertex_elements_state(r->pipe, r->vertex_elems[i]);
 }
 
 static bool
@@ -888,54 +944,6 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
       );
    }
 
-   /* Position element */
-   r->vertex_elems[0].src_offset = 0;
-   r->vertex_elems[0].instance_divisor = 0;
-   r->vertex_elems[0].vertex_buffer_index = 0;
-   r->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* Luma, texcoord element */
-   r->vertex_elems[1].src_offset = sizeof(struct vertex2f);
-   r->vertex_elems[1].instance_divisor = 0;
-   r->vertex_elems[1].vertex_buffer_index = 0;
-   r->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* Chroma Cr texcoord element */
-   r->vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
-   r->vertex_elems[2].instance_divisor = 0;
-   r->vertex_elems[2].vertex_buffer_index = 0;
-   r->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* Chroma Cb texcoord element */
-   r->vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
-   r->vertex_elems[3].instance_divisor = 0;
-   r->vertex_elems[3].vertex_buffer_index = 0;
-   r->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* First ref surface top field texcoord element */
-   r->vertex_elems[4].src_offset = 0;
-   r->vertex_elems[4].instance_divisor = 0;
-   r->vertex_elems[4].vertex_buffer_index = 1;
-   r->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* First ref surface bottom field texcoord element */
-   r->vertex_elems[5].src_offset = sizeof(struct vertex2f);
-   r->vertex_elems[5].instance_divisor = 0;
-   r->vertex_elems[5].vertex_buffer_index = 1;
-   r->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* Second ref surface top field texcoord element */
-   r->vertex_elems[6].src_offset = 0;
-   r->vertex_elems[6].instance_divisor = 0;
-   r->vertex_elems[6].vertex_buffer_index = 2;
-   r->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* Second ref surface bottom field texcoord element */
-   r->vertex_elems[7].src_offset = sizeof(struct vertex2f);
-   r->vertex_elems[7].instance_divisor = 0;
-   r->vertex_elems[7].vertex_buffer_index = 2;
-   r->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
    r->vs_const_buf = pipe_buffer_create
    (
       r->pipe->screen,
@@ -1299,7 +1307,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all);
-      r->pipe->set_vertex_elements(r->pipe, 4, r->vertex_elems);
+      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[0]);
       r->pipe->set_fragment_sampler_textures(r->pipe, 3, r->textures.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 3, r->samplers.all);
       r->pipe->bind_vs_state(r->pipe, r->i_vs);
@@ -1312,7 +1320,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
-      r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
+      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[1]);
       r->textures.individual.ref[0] = r->past;
       r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
@@ -1326,7 +1334,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) {
       r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
-      r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
+      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[1]);
       r->textures.individual.ref[0] = r->past;
       r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
@@ -1340,7 +1348,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
-      r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
+      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[1]);
       r->textures.individual.ref[0] = r->future;
       r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
@@ -1354,7 +1362,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0 */ ) {
       r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
-      r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
+      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[1]);
       r->textures.individual.ref[0] = r->future;
       r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
@@ -1368,7 +1376,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
-      r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
+      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[2]);
       r->textures.individual.ref[0] = r->past;
       r->textures.individual.ref[1] = r->future;
       r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all);
@@ -1383,7 +1391,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) {
       r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
-      r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
+      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems[2]);
       r->textures.individual.ref[0] = r->past;
       r->textures.individual.ref[1] = r->future;
       r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index f00b8c7b8b..a11a3e7307 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -66,8 +66,8 @@ struct vl_mpeg12_mc_renderer
    struct pipe_buffer *vs_const_buf;
    struct pipe_buffer *fs_const_buf;
    struct pipe_framebuffer_state fb_state;
-   struct pipe_vertex_element vertex_elems[8];
-	
+   void *vertex_elems[3];
+
    union
    {
       void *all[5];
-- 
cgit v1.2.3


From 8397c80646364eb20d7e83304b55e6f3ee353b49 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Mon, 1 Mar 2010 18:42:47 +0100
Subject: gallium/docs: document new vertex element cso interface

---
 src/gallium/docs/source/context.rst    |  2 +-
 src/gallium/docs/source/cso/velems.rst | 24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)
 create mode 100644 src/gallium/docs/source/cso/velems.rst

diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst
index 9080addba4..4608e97adb 100644
--- a/src/gallium/docs/source/context.rst
+++ b/src/gallium/docs/source/context.rst
@@ -24,6 +24,7 @@ CSO objects handled by the context object:
 * :ref:`Depth, Stencil, & Alpha`: ``*_depth_stencil_alpha_state``
 * :ref:`Shader`: These have two sets of methods. ``*_fs_state`` is for
   fragment shaders, and ``*_vs_state`` is for vertex shaders.
+* :ref:`Vertex Elements`: ``*_vertex_elements_state``
 
 
 Resource Binding State
@@ -60,7 +61,6 @@ objects. They all follow simple, one-method binding calls, e.g.
   not have the scissor test enabled, then the scissor bounds never need to
   be set since they will not be used.
 * ``set_viewport_state``
-* ``set_vertex_elements``
 
 
 Clearing
diff --git a/src/gallium/docs/source/cso/velems.rst b/src/gallium/docs/source/cso/velems.rst
new file mode 100644
index 0000000000..8e758fae10
--- /dev/null
+++ b/src/gallium/docs/source/cso/velems.rst
@@ -0,0 +1,24 @@
+.. _vertex,elements
+
+Vertex Elements
+===============
+
+This state controls format etc. of the input attributes contained
+in the pipe_vertex_buffer(s). There's one pipe_vertex_element array member
+for each input attribute.
+
+Members
+-------
+
+src_offset
+    The byte offset of the attribute in the buffer given by
+    vertex_buffer_index for the first vertex.
+instance_divisor
+    The instance data rate divisor, used for instancing.
+    0 means this is per-vertex data, n means per-instance data used for
+    n consecutive instances (n > 0).
+vertex_buffer_index
+    The vertex buffer this attribute lives in. Several attributes may
+    live in the same vertex buffer.
+src_format
+    The format of the attribute data. One of the PIPE_FORMAT tokens.
-- 
cgit v1.2.3


From 711476be2904bd16d45b424236d6aefa9a9e6bc9 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Mon, 1 Mar 2010 18:43:58 +0100
Subject: xorg: adapt to new vertex element cso

---
 src/gallium/state_trackers/xorg/xorg_renderer.c | 13 +++++++++++++
 src/gallium/state_trackers/xorg/xorg_renderer.h |  1 +
 2 files changed, 14 insertions(+)

diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.c b/src/gallium/state_trackers/xorg/xorg_renderer.c
index 83b0d31e38..1eb926360b 100644
--- a/src/gallium/state_trackers/xorg/xorg_renderer.c
+++ b/src/gallium/state_trackers/xorg/xorg_renderer.c
@@ -68,6 +68,8 @@ renderer_draw(struct xorg_renderer *r)
 
 
    if (buf) {
+      cso_set_vertex_elements(r->cso, r->attrs_per_vertex, r->velems);
+
       util_draw_vertex_buffer(pipe, buf, 0,
                               PIPE_PRIM_QUADS,
                               num_verts,  /* verts */
@@ -92,6 +94,7 @@ renderer_init_state(struct xorg_renderer *r)
 {
    struct pipe_depth_stencil_alpha_state dsa;
    struct pipe_rasterizer_state raster;
+   unsigned i;
 
    /* set common initial clip state */
    memset(&dsa, 0, sizeof(struct pipe_depth_stencil_alpha_state));
@@ -103,6 +106,14 @@ renderer_init_state(struct xorg_renderer *r)
    raster.gl_rasterization_rules = 1;
    cso_set_rasterizer(r->cso, &raster);
 
+   /* vertex elements state */
+   memset(&r->velems[0], 0, sizeof(r->velems[0]) * 3);
+   for (i = 0; i < 3; i++) {
+      r->velems[i].src_offset = i * 4 * sizeof(float);
+      r->velems[i].instance_divisor = 0;
+      r->velems[i].vertex_buffer_index = 0;
+      r->velems[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+   }
 }
 
 
@@ -600,6 +611,8 @@ void renderer_draw_yuv(struct xorg_renderer *r,
    if (buf) {
       const int num_attribs = 2; /*pos + tex coord*/
 
+      cso_set_vertex_elements(r->cso, num_attribs, r->velems);
+
       util_draw_vertex_buffer(pipe, buf, 0,
                               PIPE_PRIM_QUADS,
                               4,  /* verts */
diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.h b/src/gallium/state_trackers/xorg/xorg_renderer.h
index af6aa0567d..3d00628719 100644
--- a/src/gallium/state_trackers/xorg/xorg_renderer.h
+++ b/src/gallium/state_trackers/xorg/xorg_renderer.h
@@ -28,6 +28,7 @@ struct xorg_renderer {
 
    float buffer[BUF_SIZE];
    int buffer_size;
+   struct pipe_vertex_element velems[3];
 
    /* number of attributes per vertex for the current
     * draw operation */
-- 
cgit v1.2.3


From 1f57069c68f7bf812d4d2e054c5ced3ed72cfa10 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Mon, 1 Mar 2010 18:45:25 +0100
Subject: vega: adapt to new vertex element cso

---
 src/gallium/state_trackers/vega/api_masks.c  | 2 ++
 src/gallium/state_trackers/vega/polygon.c    | 3 ++-
 src/gallium/state_trackers/vega/renderer.c   | 5 +++++
 src/gallium/state_trackers/vega/vg_context.c | 8 ++++++++
 src/gallium/state_trackers/vega/vg_context.h | 1 +
 5 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/vega/api_masks.c b/src/gallium/state_trackers/vega/api_masks.c
index 9c123a4cf9..7eb5ea1f07 100644
--- a/src/gallium/state_trackers/vega/api_masks.c
+++ b/src/gallium/state_trackers/vega/api_masks.c
@@ -86,6 +86,8 @@ draw_clear_quad(struct vg_context *st,
 
    /* draw */
    if (buf) {
+      cso_set_vertex_elements(st->cso_context, 2, st->velems);
+
       util_draw_vertex_buffer(pipe, buf, 0,
                               PIPE_PRIM_TRIANGLE_FAN,
                               4,  /* verts */
diff --git a/src/gallium/state_trackers/vega/polygon.c b/src/gallium/state_trackers/vega/polygon.c
index 96fbbfc85a..eef2c1eb87 100644
--- a/src/gallium/state_trackers/vega/polygon.c
+++ b/src/gallium/state_trackers/vega/polygon.c
@@ -292,11 +292,12 @@ static void draw_polygon(struct vg_context *ctx,
    pipe->set_vertex_buffers(pipe, 1, &vbuffer);
 
    /* tell pipe about the vertex attributes */
+   memset(&velement, 0, sizeof(velement));
    velement.src_offset = 0;
    velement.instance_divisor = 0;
    velement.vertex_buffer_index = 0;
    velement.src_format = PIPE_FORMAT_R32G32_FLOAT;
-   pipe->set_vertex_elements(pipe, 1, &velement);
+   cso_set_vertex_elements(ctx->cso_context, 1, &velement);
 
    /* draw */
    pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLE_FAN, 
diff --git a/src/gallium/state_trackers/vega/renderer.c b/src/gallium/state_trackers/vega/renderer.c
index 05620efa9c..47e8b470a1 100644
--- a/src/gallium/state_trackers/vega/renderer.c
+++ b/src/gallium/state_trackers/vega/renderer.c
@@ -210,6 +210,7 @@ void renderer_draw_quad(struct renderer *r,
    buf = setup_vertex_data(r, x1, y1, x2, y2, depth);
 
    if (buf) {
+      cso_set_vertex_elements(r->cso, 2, r->owner->velems);
       util_draw_vertex_buffer(r->pipe, buf, 0,
                               PIPE_PRIM_TRIANGLE_FAN,
                               4,  /* verts */
@@ -248,6 +249,7 @@ void renderer_draw_texture(struct renderer *r,
                                s0, t0, s1, t1, 0.0f);
 
    if (buf) {
+      cso_set_vertex_elements(r->cso, 2, r->owner->velems);
       util_draw_vertex_buffer(pipe, buf, 0,
                            PIPE_PRIM_TRIANGLE_FAN,
                            4,  /* verts */
@@ -370,6 +372,7 @@ void renderer_copy_texture(struct renderer *ctx,
                          0.0f);
 
    if (buf) {
+      cso_set_vertex_elements(ctx->cso, 2, ctx->owner->velems);
       util_draw_vertex_buffer(ctx->pipe, buf, 0,
                               PIPE_PRIM_TRIANGLE_FAN,
                               4,  /* verts */
@@ -535,6 +538,7 @@ void renderer_copy_surface(struct renderer *ctx,
                            (float) dstX1, (float) dstY1, z);
 
    if (buf) {
+      cso_set_vertex_elements(ctx->cso, 2, ctx->owner->velems);
       util_draw_vertex_buffer(ctx->pipe, buf, 0,
                               PIPE_PRIM_TRIANGLE_FAN,
                               4,  /* verts */
@@ -587,6 +591,7 @@ void renderer_texture_quad(struct renderer *r,
                           s0, t0, s1, t1, 0.0f);
 
    if (buf) {
+      cso_set_vertex_elements(r->cso, 2, r->owner->velems);
       util_draw_vertex_buffer(pipe, buf, 0,
                               PIPE_PRIM_TRIANGLE_FAN,
                               4,  /* verts */
diff --git a/src/gallium/state_trackers/vega/vg_context.c b/src/gallium/state_trackers/vega/vg_context.c
index 426bf9bc62..170391ec03 100644
--- a/src/gallium/state_trackers/vega/vg_context.c
+++ b/src/gallium/state_trackers/vega/vg_context.c
@@ -72,6 +72,7 @@ struct vg_context * vg_create_context(struct pipe_context *pipe,
                                       struct vg_context *share)
 {
    struct vg_context *ctx;
+   unsigned i;
 
    ctx = CALLOC_STRUCT(vg_context);
 
@@ -103,6 +104,13 @@ struct vg_context * vg_create_context(struct pipe_context *pipe,
    ctx->blend_sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
    ctx->blend_sampler.normalized_coords = 0;
 
+   for (i = 0; i < 2; i++) {
+      ctx->velems[i].src_offset = i * 4 * sizeof(float);
+      ctx->velems[i].instance_divisor = 0;
+      ctx->velems[i].vertex_buffer_index = 0;
+      ctx->velems[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+   }
+
    vg_set_error(ctx, VG_NO_ERROR);
 
    ctx->owned_objects[VG_OBJECT_PAINT] = cso_hash_create();
diff --git a/src/gallium/state_trackers/vega/vg_context.h b/src/gallium/state_trackers/vega/vg_context.h
index bc88c8d139..804e9e76d7 100644
--- a/src/gallium/state_trackers/vega/vg_context.h
+++ b/src/gallium/state_trackers/vega/vg_context.h
@@ -146,6 +146,7 @@ struct vg_context
    struct vg_shader *clear_vs;
    struct vg_shader *texture_vs;
    struct pipe_buffer *vs_const_buffer;
+   struct pipe_vertex_element velems[2];
 };
 
 struct vg_object {
-- 
cgit v1.2.3


From 9abef5ebfa6be3f8d39fbc8b92293c2c0c81ce46 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Mon, 1 Mar 2010 18:45:46 +0100
Subject: vega: adapt to new vertex element cso

---
 src/gallium/state_trackers/python/p_context.i | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/gallium/state_trackers/python/p_context.i b/src/gallium/state_trackers/python/p_context.i
index 3f36ccb621..5c44462e80 100644
--- a/src/gallium/state_trackers/python/p_context.i
+++ b/src/gallium/state_trackers/python/p_context.i
@@ -51,7 +51,7 @@ struct st_context {
    void set_blend( const struct pipe_blend_state *state ) {
       cso_set_blend($self->cso, state);
    }
-   
+
    void set_fragment_sampler( unsigned index, const struct pipe_sampler_state *state ) {
       cso_single_sampler($self->cso, index, state);
       cso_single_sampler_done($self->cso);
@@ -222,9 +222,9 @@ struct st_context {
    void set_vertex_elements(unsigned num) 
    {
       $self->num_vertex_elements = num;
-      $self->pipe->set_vertex_elements($self->pipe, 
-                                       $self->num_vertex_elements, 
-                                       $self->vertex_elements);
+      cso_set_vertex_elements($self->cso,
+                              $self->num_vertex_elements, 
+                              $self->vertex_elements);
    }
 
    /*
-- 
cgit v1.2.3


From 618e7e0c88400dc853b0ff4792a0c70800587073 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Mon, 1 Mar 2010 18:45:57 +0100
Subject: trace: adapt to new vertex element cso

---
 src/gallium/drivers/trace/tr_context.c | 91 +++++++++++++++++++++++++---------
 1 file changed, 67 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index df40fbade6..133521f45e 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -773,6 +773,70 @@ trace_context_delete_vs_state(struct pipe_context *_pipe,
 }
 
 
+static INLINE void *
+trace_context_create_vertex_elements_state(struct pipe_context *_pipe,
+                                           unsigned num_elements,
+                                           const struct  pipe_vertex_element *elements)
+{
+   struct trace_context *tr_ctx = trace_context(_pipe);
+   struct pipe_context *pipe = tr_ctx->pipe;
+   void * result;
+
+   trace_dump_call_begin("pipe_context", "create_vertex_elements_state");
+
+   trace_dump_arg(ptr, pipe);
+   trace_dump_arg(uint, num_elements);
+
+   trace_dump_arg_begin("elements");
+   trace_dump_struct_array(vertex_element, elements, num_elements);
+   trace_dump_arg_end();
+
+   result = pipe->create_vertex_elements_state(pipe, num_elements, elements);
+
+   trace_dump_ret(ptr, result);
+
+   trace_dump_call_end();
+
+   return result;
+}
+
+
+static INLINE void
+trace_context_bind_vertex_elements_state(struct pipe_context *_pipe,
+                                         void *state)
+{
+   struct trace_context *tr_ctx = trace_context(_pipe);
+   struct pipe_context *pipe = tr_ctx->pipe;
+
+   trace_dump_call_begin("pipe_context", "bind_vertex_elements_state");
+
+   trace_dump_arg(ptr, pipe);
+   trace_dump_arg(ptr, state);
+
+   pipe->bind_vertex_elements_state(pipe, state);
+
+   trace_dump_call_end();
+}
+
+
+static INLINE void
+trace_context_delete_vertex_elements_state(struct pipe_context *_pipe,
+                                           void *state)
+{
+   struct trace_context *tr_ctx = trace_context(_pipe);
+   struct pipe_context *pipe = tr_ctx->pipe;
+
+   trace_dump_call_begin("pipe_context", "delete_verte_elements_state");
+
+   trace_dump_arg(ptr, pipe);
+   trace_dump_arg(ptr, state);
+
+   pipe->delete_vertex_elements_state(pipe, state);
+
+   trace_dump_call_end();
+}
+
+
 static INLINE void
 trace_context_set_blend_color(struct pipe_context *_pipe,
                               const struct pipe_blend_color *state)
@@ -1047,29 +1111,6 @@ trace_context_set_vertex_buffers(struct pipe_context *_pipe,
 }
 
 
-static INLINE void
-trace_context_set_vertex_elements(struct pipe_context *_pipe,
-                                  unsigned num_elements,
-                                  const struct pipe_vertex_element *elements)
-{
-   struct trace_context *tr_ctx = trace_context(_pipe);
-   struct pipe_context *pipe = tr_ctx->pipe;
-
-   trace_dump_call_begin("pipe_context", "set_vertex_elements");
-
-   trace_dump_arg(ptr, pipe);
-   trace_dump_arg(uint, num_elements);
-
-   trace_dump_arg_begin("elements");
-   trace_dump_struct_array(vertex_element, elements, num_elements);
-   trace_dump_arg_end();
-
-   pipe->set_vertex_elements(pipe, num_elements, elements);
-
-   trace_dump_call_end();
-}
-
-
 static INLINE void
 trace_context_surface_copy(struct pipe_context *_pipe,
                            struct pipe_surface *dest,
@@ -1303,6 +1344,9 @@ trace_context_create(struct trace_screen *tr_scr,
    tr_ctx->base.create_vs_state = trace_context_create_vs_state;
    tr_ctx->base.bind_vs_state = trace_context_bind_vs_state;
    tr_ctx->base.delete_vs_state = trace_context_delete_vs_state;
+   tr_ctx->base.create_vertex_elements_state = trace_context_create_vertex_elements_state;
+   tr_ctx->base.bind_vertex_elements_state = trace_context_bind_vertex_elements_state;
+   tr_ctx->base.delete_vertex_elements_state = trace_context_delete_vertex_elements_state;
    tr_ctx->base.set_blend_color = trace_context_set_blend_color;
    tr_ctx->base.set_stencil_ref = trace_context_set_stencil_ref;
    tr_ctx->base.set_clip_state = trace_context_set_clip_state;
@@ -1314,7 +1358,6 @@ trace_context_create(struct trace_screen *tr_scr,
    tr_ctx->base.set_fragment_sampler_textures = trace_context_set_fragment_sampler_textures;
    tr_ctx->base.set_vertex_sampler_textures = trace_context_set_vertex_sampler_textures;
    tr_ctx->base.set_vertex_buffers = trace_context_set_vertex_buffers;
-   tr_ctx->base.set_vertex_elements = trace_context_set_vertex_elements;
    if (pipe->surface_copy)
       tr_ctx->base.surface_copy = trace_context_surface_copy;
    if (pipe->surface_fill)
-- 
cgit v1.2.3


From b1676a953eb61838e9ef03532792bc723f4d4ada Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Mon, 1 Mar 2010 18:46:12 +0100
Subject: svga: adapt to new vertex element cso

---
 src/gallium/drivers/svga/svga_context.h          |  8 +++--
 src/gallium/drivers/svga/svga_pipe_vertex.c      | 38 ++++++++++++++++++------
 src/gallium/drivers/svga/svga_state_need_swtnl.c |  4 +--
 src/gallium/drivers/svga/svga_state_vdecl.c      |  8 ++---
 src/gallium/drivers/svga/svga_state_vs.c         |  4 +--
 src/gallium/drivers/svga/svga_swtnl_state.c      |  4 +--
 6 files changed, 45 insertions(+), 21 deletions(-)

diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
index 03302e2a6e..4d9f00991a 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -169,6 +169,11 @@ struct svga_sampler_state {
    unsigned view_max_lod;
 };
 
+struct svga_velems_state {
+   unsigned count;
+   struct pipe_vertex_element velem[];
+};
+
 /* Use to calculate differences between state emitted to hardware and
  * current driver-calculated state.  
  */
@@ -178,13 +183,13 @@ struct svga_state
    const struct svga_depth_stencil_state *depth;
    const struct svga_rasterizer_state *rast;
    const struct svga_sampler_state *sampler[PIPE_MAX_SAMPLERS];
+   const struct svga_velems_state *velems;
 
    struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; /* or texture ID's? */
    struct svga_fragment_shader *fs;
    struct svga_vertex_shader *vs;
 
    struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
-   struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
    struct pipe_buffer *cb[PIPE_SHADER_TYPES];
 
    struct pipe_framebuffer_state framebuffer;
@@ -204,7 +209,6 @@ struct svga_state
 
    unsigned num_samplers;
    unsigned num_textures;
-   unsigned num_vertex_elements;
    unsigned num_vertex_buffers;
    unsigned reduced_prim;
 
diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c
index ffc0f99565..979deb12af 100644
--- a/src/gallium/drivers/svga/svga_pipe_vertex.c
+++ b/src/gallium/drivers/svga/svga_pipe_vertex.c
@@ -26,6 +26,7 @@
 #include "util/u_inlines.h"
 #include "pipe/p_defines.h"
 #include "util/u_math.h"
+#include "util/u_memory.h"
 #include "tgsi/tgsi_parse.h"
 
 #include "svga_screen.h"
@@ -64,20 +65,37 @@ static void svga_set_vertex_buffers(struct pipe_context *pipe,
    svga->dirty |= SVGA_NEW_VBUFFER;
 }
 
-static void svga_set_vertex_elements(struct pipe_context *pipe,
-                                     unsigned count,
-                                     const struct pipe_vertex_element *elements)
+
+static void *
+svga_create_vertex_elements_state(struct pipe_context *pipe,
+                                  unsigned count,
+                                  const struct pipe_vertex_element *attribs)
 {
-   struct svga_context *svga = svga_context(pipe);
-   unsigned i;
+   struct svga_velems_state *velems;
+   assert(count <= PIPE_MAX_ATTRIBS);
+   velems = (struct svga_velems_state *) MALLOC(sizeof(struct svga_velems_state) + count * sizeof(*attribs));
+   if (velems) {
+      velems->count = count;
+      memcpy(velems->velem, attribs, sizeof(*attribs) * count);
+   }
+   return velems;
+}
 
-   for (i = 0; i < count; i++)
-      svga->curr.ve[i] = elements[i];
+static void svga_bind_vertex_elements_state(struct pipe_context *pipe,
+                                            void *velems)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_velems_state *svga_velems = (struct svga_velems_state *) velems;
 
-   svga->curr.num_vertex_elements = count;
+   svga->curr.velems = svga_velems;
    svga->dirty |= SVGA_NEW_VELEMENT;
 }
 
+static void svga_delete_vertex_elements_state(struct pipe_context *pipe,
+                                              void *velems)
+{
+   FREE(velems);
+}
 
 void svga_cleanup_vertex_state( struct svga_context *svga )
 {
@@ -91,7 +109,9 @@ void svga_cleanup_vertex_state( struct svga_context *svga )
 void svga_init_vertex_functions( struct svga_context *svga )
 {
    svga->pipe.set_vertex_buffers = svga_set_vertex_buffers;
-   svga->pipe.set_vertex_elements = svga_set_vertex_elements;
+   svga->pipe.create_vertex_elements_state = svga_create_vertex_elements_state;
+   svga->pipe.bind_vertex_elements_state = svga_bind_vertex_elements_state;
+   svga->pipe.delete_vertex_elements_state = svga_delete_vertex_elements_state;
 }
 
 
diff --git a/src/gallium/drivers/svga/svga_state_need_swtnl.c b/src/gallium/drivers/svga/svga_state_need_swtnl.c
index dd13a89d24..94fe0bc968 100644
--- a/src/gallium/drivers/svga/svga_state_need_swtnl.c
+++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c
@@ -76,8 +76,8 @@ static int update_need_swvfetch( struct svga_context *svga,
    unsigned i;
    boolean need_swvfetch = FALSE;
 
-   for (i = 0; i < svga->curr.num_vertex_elements; i++) {
-      svga->state.sw.ve_format[i] = svga_translate_vertex_format(svga->curr.ve[i].src_format);
+   for (i = 0; i < svga->curr.velems->count; i++) {
+      svga->state.sw.ve_format[i] = svga_translate_vertex_format(svga->curr.velems->velem[i].src_format);
       if (svga->state.sw.ve_format[i] == SVGA3D_DECLTYPE_MAX) {
          need_swvfetch = TRUE;
          break;
diff --git a/src/gallium/drivers/svga/svga_state_vdecl.c b/src/gallium/drivers/svga/svga_state_vdecl.c
index d1066ce13b..91854f6530 100644
--- a/src/gallium/drivers/svga/svga_state_vdecl.c
+++ b/src/gallium/drivers/svga/svga_state_vdecl.c
@@ -98,17 +98,17 @@ upload_user_buffers( struct svga_context *svga )
 static int emit_hw_vs_vdecl( struct svga_context *svga,
                              unsigned dirty )
 {
-   const struct pipe_vertex_element *ve = svga->curr.ve;
+   const struct pipe_vertex_element *ve = svga->curr.velems->velem;
    SVGA3dVertexDecl decl;
    unsigned i;
 
-   assert(svga->curr.num_vertex_elements >=
+   assert(svga->curr.velems->count >=
           svga->curr.vs->base.info.file_count[TGSI_FILE_INPUT]);
 
    svga_hwtnl_reset_vdecl( svga->hwtnl, 
-                           svga->curr.num_vertex_elements );
+                           svga->curr.velems->count );
 
-   for (i = 0; i < svga->curr.num_vertex_elements; i++) {
+   for (i = 0; i < svga->curr.velems->count; i++) {
       const struct pipe_vertex_buffer *vb = &svga->curr.vb[ve[i].vertex_buffer_index];
       unsigned usage, index;
 
diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c
index d7999fe53d..781f7bf533 100644
--- a/src/gallium/drivers/svga/svga_state_vs.c
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@@ -186,8 +186,8 @@ static int update_zero_stride( struct svga_context *svga,
    svga->curr.zero_stride_vertex_elements = 0;
    svga->curr.num_zero_stride_vertex_elements = 0;
 
-   for (i = 0; i < svga->curr.num_vertex_elements; i++) {
-      const struct pipe_vertex_element *vel = &svga->curr.ve[i];
+   for (i = 0; i < svga->curr.velems->count; i++) {
+      const struct pipe_vertex_element *vel = &svga->curr.velems->velem[i];
       const struct pipe_vertex_buffer *vbuffer = &svga->curr.vb[
          vel->vertex_buffer_index];
       if (vbuffer->stride == 0) {
diff --git a/src/gallium/drivers/svga/svga_swtnl_state.c b/src/gallium/drivers/svga/svga_swtnl_state.c
index 35f36a828f..246d34e649 100644
--- a/src/gallium/drivers/svga/svga_swtnl_state.c
+++ b/src/gallium/drivers/svga/svga_swtnl_state.c
@@ -99,8 +99,8 @@ static int update_swtnl_draw( struct svga_context *svga,
 
    if (dirty & SVGA_NEW_VELEMENT)
       draw_set_vertex_elements(svga->swtnl.draw, 
-                               svga->curr.num_vertex_elements, 
-                               svga->curr.ve );
+                               svga->curr.velems->count, 
+                               svga->curr.velems->velem );
 
    if (dirty & SVGA_NEW_CLIP)
       draw_set_clip_state(svga->swtnl.draw, 
-- 
cgit v1.2.3


From 057427dd3ea9da6f907b39391f6bd1b093fb128e Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Mon, 1 Mar 2010 18:46:29 +0100
Subject: softpipe: adapt to new vertex element cso

---
 src/gallium/drivers/softpipe/sp_context.c      |  5 +++-
 src/gallium/drivers/softpipe/sp_context.h      |  4 +--
 src/gallium/drivers/softpipe/sp_state.h        | 17 +++++++++----
 src/gallium/drivers/softpipe/sp_state_vertex.c | 35 +++++++++++++++++++-------
 4 files changed, 44 insertions(+), 17 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index ddc35bcd62..e8f2318e48 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -245,6 +245,10 @@ softpipe_create_context( struct pipe_screen *screen,
    softpipe->pipe.bind_gs_state   = softpipe_bind_gs_state;
    softpipe->pipe.delete_gs_state = softpipe_delete_gs_state;
 
+   softpipe->pipe.create_vertex_elements_state = softpipe_create_vertex_elements_state;
+   softpipe->pipe.bind_vertex_elements_state = softpipe_bind_vertex_elements_state;
+   softpipe->pipe.delete_vertex_elements_state = softpipe_delete_vertex_elements_state;
+
    softpipe->pipe.set_blend_color = softpipe_set_blend_color;
    softpipe->pipe.set_stencil_ref = softpipe_set_stencil_ref;
    softpipe->pipe.set_clip_state = softpipe_set_clip_state;
@@ -257,7 +261,6 @@ softpipe_create_context( struct pipe_screen *screen,
    softpipe->pipe.set_viewport_state = softpipe_set_viewport_state;
 
    softpipe->pipe.set_vertex_buffers = softpipe_set_vertex_buffers;
-   softpipe->pipe.set_vertex_elements = softpipe_set_vertex_elements;
 
    softpipe->pipe.draw_arrays = softpipe_draw_arrays;
    softpipe->pipe.draw_elements = softpipe_draw_elements;
diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h
index 95def72c54..396d46d4ba 100644
--- a/src/gallium/drivers/softpipe/sp_context.h
+++ b/src/gallium/drivers/softpipe/sp_context.h
@@ -45,6 +45,7 @@ struct softpipe_tile_cache;
 struct softpipe_tex_tile_cache;
 struct sp_fragment_shader;
 struct sp_vertex_shader;
+struct sp_velems_state;
 
 
 struct softpipe_context {
@@ -59,6 +60,7 @@ struct softpipe_context {
    struct sp_fragment_shader *fs;
    struct sp_vertex_shader *vs;
    struct sp_geometry_shader *gs;
+   struct sp_velems_state *velems;
 
    /** Other rendering state */
    struct pipe_blend_color blend_color;
@@ -72,13 +74,11 @@ struct softpipe_context {
    struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS];
    struct pipe_viewport_state viewport;
    struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
-   struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
 
    unsigned num_samplers;
    unsigned num_textures;
    unsigned num_vertex_samplers;
    unsigned num_vertex_textures;
-   unsigned num_vertex_elements;
    unsigned num_vertex_buffers;
 
    unsigned dirty; /**< Mask of SP_NEW_x flags */
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h
index 4370bbeaee..a6b9a841fe 100644
--- a/src/gallium/drivers/softpipe/sp_state.h
+++ b/src/gallium/drivers/softpipe/sp_state.h
@@ -100,6 +100,11 @@ struct sp_geometry_shader {
    struct draw_geometry_shader *draw_data;
 };
 
+struct sp_velems_state {
+   unsigned count;
+   struct pipe_vertex_element velem[];
+};
+
 
 void *
 softpipe_create_blend_state(struct pipe_context *,
@@ -160,8 +165,14 @@ void *softpipe_create_gs_state(struct pipe_context *,
 void softpipe_bind_gs_state(struct pipe_context *, void *);
 void softpipe_delete_gs_state(struct pipe_context *, void *);
 
+void *softpipe_create_vertex_elements_state(struct pipe_context *,
+                                            unsigned count,
+                                            const struct pipe_vertex_element *);
+void softpipe_bind_vertex_elements_state(struct pipe_context *, void *);
+void softpipe_delete_vertex_elements_state(struct pipe_context *, void *);
+
 void softpipe_set_polygon_stipple( struct pipe_context *,
-				  const struct pipe_poly_stipple * );
+                                   const struct pipe_poly_stipple * );
 
 void softpipe_set_scissor_state( struct pipe_context *,
                                  const struct pipe_scissor_state * );
@@ -178,10 +189,6 @@ softpipe_set_vertex_sampler_textures(struct pipe_context *,
 void softpipe_set_viewport_state( struct pipe_context *,
                                   const struct pipe_viewport_state * );
 
-void softpipe_set_vertex_elements(struct pipe_context *,
-                                  unsigned count,
-                                  const struct pipe_vertex_element *);
-
 void softpipe_set_vertex_buffers(struct pipe_context *,
                                  unsigned count,
                                  const struct pipe_vertex_buffer *);
diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c
index b491d92ed1..e7dc3d002b 100644
--- a/src/gallium/drivers/softpipe/sp_state_vertex.c
+++ b/src/gallium/drivers/softpipe/sp_state_vertex.c
@@ -32,27 +32,44 @@
 #include "sp_context.h"
 #include "sp_state.h"
 
+#include "util/u_memory.h"
 #include "draw/draw_context.h"
 
 
+void *
+softpipe_create_vertex_elements_state(struct pipe_context *pipe,
+                                      unsigned count,
+                                      const struct pipe_vertex_element *attribs)
+{
+   struct sp_velems_state *velems;
+   assert(count <= PIPE_MAX_ATTRIBS);
+   velems = (struct sp_velems_state *) MALLOC(sizeof(struct sp_velems_state) + count * sizeof(*attribs));
+   if (velems) {
+      velems->count = count;
+      memcpy(velems->velem, attribs, sizeof(*attribs) * count);
+   }
+   return velems;
+}
+
 void
-softpipe_set_vertex_elements(struct pipe_context *pipe,
-                             unsigned count,
-                             const struct pipe_vertex_element *attribs)
+softpipe_bind_vertex_elements_state(struct pipe_context *pipe,
+                                    void *velems)
 {
    struct softpipe_context *softpipe = softpipe_context(pipe);
+   struct sp_velems_state *sp_velems = (struct sp_velems_state *) velems;
 
-   assert(count <= PIPE_MAX_ATTRIBS);
-
-   memcpy(softpipe->vertex_element, attribs,
-          count * sizeof(struct pipe_vertex_element));
-   softpipe->num_vertex_elements = count;
+   softpipe->velems = sp_velems;
 
    softpipe->dirty |= SP_NEW_VERTEX;
 
-   draw_set_vertex_elements(softpipe->draw, count, attribs);
+   draw_set_vertex_elements(softpipe->draw, sp_velems->count, sp_velems->velem);
 }
 
+void
+softpipe_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
+{
+   FREE( velems );
+}
 
 void
 softpipe_set_vertex_buffers(struct pipe_context *pipe,
-- 
cgit v1.2.3


From f89730385532056e89e3b9053c244a67f84e323e Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Mon, 1 Mar 2010 18:46:39 +0100
Subject: llvmpipe: adapt to new vertex element cso

---
 src/gallium/drivers/llvmpipe/lp_context.c      |  5 +++-
 src/gallium/drivers/llvmpipe/lp_context.h      |  4 +--
 src/gallium/drivers/llvmpipe/lp_state.h        | 16 ++++++++----
 src/gallium/drivers/llvmpipe/lp_state_vertex.c | 34 +++++++++++++++++++-------
 4 files changed, 42 insertions(+), 17 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index 9120226de0..ccd1cf8eec 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -145,6 +145,10 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv )
    llvmpipe->pipe.bind_vs_state   = llvmpipe_bind_vs_state;
    llvmpipe->pipe.delete_vs_state = llvmpipe_delete_vs_state;
 
+   llvmpipe->pipe.create_vertex_elements_state = llvmpipe_create_vertex_elements_state;
+   llvmpipe->pipe.bind_vertex_elements_state = llvmpipe_bind_vertex_elements_state;
+   llvmpipe->pipe.delete_vertex_elements_state = llvmpipe_delete_vertex_elements_state;
+
    llvmpipe->pipe.set_blend_color = llvmpipe_set_blend_color;
    llvmpipe->pipe.set_stencil_ref = llvmpipe_set_stencil_ref;
    llvmpipe->pipe.set_clip_state = llvmpipe_set_clip_state;
@@ -157,7 +161,6 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv )
    llvmpipe->pipe.set_viewport_state = llvmpipe_set_viewport_state;
 
    llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers;
-   llvmpipe->pipe.set_vertex_elements = llvmpipe_set_vertex_elements;
 
    llvmpipe->pipe.draw_arrays = llvmpipe_draw_arrays;
    llvmpipe->pipe.draw_elements = llvmpipe_draw_elements;
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index 955c7eb8e0..217ec59b68 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -46,6 +46,7 @@ struct lp_fragment_shader;
 struct lp_vertex_shader;
 struct lp_blend_state;
 struct setup_context;
+struct lp_velems_state;
 
 struct llvmpipe_context {
    struct pipe_context pipe;  /**< base class */
@@ -58,6 +59,7 @@ struct llvmpipe_context {
    const struct pipe_rasterizer_state *rasterizer;
    struct lp_fragment_shader *fs;
    const struct lp_vertex_shader *vs;
+   const struct lp_velems_state *velems;
 
    /** Other rendering state */
    struct pipe_blend_color blend_color;
@@ -71,13 +73,11 @@ struct llvmpipe_context {
    struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS];
    struct pipe_viewport_state viewport;
    struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
-   struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
 
    unsigned num_samplers;
    unsigned num_textures;
    unsigned num_vertex_samplers;
    unsigned num_vertex_textures;
-   unsigned num_vertex_elements;
    unsigned num_vertex_buffers;
 
    unsigned dirty; /**< Mask of LP_NEW_x flags */
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index 9beba32271..57f5bd0042 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -119,6 +119,10 @@ struct lp_vertex_shader {
    struct draw_vertex_shader *draw_data;
 };
 
+struct lp_velems_state {
+   unsigned count;
+   struct pipe_vertex_element velem[];
+};
 
 
 void *
@@ -176,8 +180,14 @@ void *llvmpipe_create_vs_state(struct pipe_context *,
 void llvmpipe_bind_vs_state(struct pipe_context *, void *);
 void llvmpipe_delete_vs_state(struct pipe_context *, void *);
 
+void *llvmpipe_create_vertex_elements_state(struct pipe_context *,
+                                            unsigned count,
+                                            const struct pipe_vertex_element *);
+void llvmpipe_bind_vertex_elements_state(struct pipe_context *, void *);
+void llvmpipe_delete_vertex_elements_state(struct pipe_context *, void *);
+
 void llvmpipe_set_polygon_stipple( struct pipe_context *,
-				  const struct pipe_poly_stipple * );
+                                   const struct pipe_poly_stipple * );
 
 void llvmpipe_set_scissor_state( struct pipe_context *,
                                  const struct pipe_scissor_state * );
@@ -194,10 +204,6 @@ llvmpipe_set_vertex_sampler_textures(struct pipe_context *,
 void llvmpipe_set_viewport_state( struct pipe_context *,
                                   const struct pipe_viewport_state * );
 
-void llvmpipe_set_vertex_elements(struct pipe_context *,
-                                  unsigned count,
-                                  const struct pipe_vertex_element *);
-
 void llvmpipe_set_vertex_buffers(struct pipe_context *,
                                  unsigned count,
                                  const struct pipe_vertex_buffer *);
diff --git a/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/src/gallium/drivers/llvmpipe/lp_state_vertex.c
index 57ac25ea0c..5a9b6d5e18 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_vertex.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_vertex.c
@@ -35,24 +35,40 @@
 #include "draw/draw_context.h"
 
 
+void *
+llvmpipe_create_vertex_elements_state(struct pipe_context *pipe,
+                                      unsigned count,
+                                      const struct pipe_vertex_element *attribs)
+{
+   struct lp_velems_state *velems;
+   assert(count <= PIPE_MAX_ATTRIBS);
+   velems = (struct lp_velems_state *) MALLOC(sizeof(struct lp_velems_state) + count * sizeof(*attribs));
+   if (velems) {
+      velems->count = count;
+      memcpy(velems->velem, attribs, sizeof(*attribs) * count);
+   }
+   return velems;
+}
+
 void
-llvmpipe_set_vertex_elements(struct pipe_context *pipe,
-                             unsigned count,
-                             const struct pipe_vertex_element *attribs)
+llvmpipe_bind_vertex_elements_state(struct pipe_context *pipe,
+                                    void *velems)
 {
    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+   struct lp_velems_state *lp_velems = (struct lp_velems_state *) velems;
 
-   assert(count <= PIPE_MAX_ATTRIBS);
-
-   memcpy(llvmpipe->vertex_element, attribs,
-          count * sizeof(struct pipe_vertex_element));
-   llvmpipe->num_vertex_elements = count;
+   llvmpipe->velems = lp_velems;
 
    llvmpipe->dirty |= LP_NEW_VERTEX;
 
-   draw_set_vertex_elements(llvmpipe->draw, count, attribs);
+   draw_set_vertex_elements(llvmpipe->draw, lp_velems->count, lp_velems->velem);
 }
 
+void
+llvmpipe_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
+{
+   FREE( velems );
+}
 
 void
 llvmpipe_set_vertex_buffers(struct pipe_context *pipe,
-- 
cgit v1.2.3


From e9441e1f88cc07365f4d7d9149ccefe128809645 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Mon, 1 Mar 2010 18:46:49 +0100
Subject: r300g: adapt to new vertex element cso

---
 src/gallium/drivers/r300/r300_blit.c          |  1 +
 src/gallium/drivers/r300/r300_context.h       |  8 +++--
 src/gallium/drivers/r300/r300_emit.c          |  4 +--
 src/gallium/drivers/r300/r300_render.c        | 12 ++++----
 src/gallium/drivers/r300/r300_state.c         | 44 +++++++++++++++++++--------
 src/gallium/drivers/r300/r300_state_derived.c |  6 ++--
 6 files changed, 50 insertions(+), 25 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
index eb9b0beeb5..2f9650df1b 100644
--- a/src/gallium/drivers/r300/r300_blit.c
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -34,6 +34,7 @@ static void r300_blitter_save_states(struct r300_context* r300)
     util_blitter_save_rasterizer(r300->blitter, r300->rs_state.state);
     util_blitter_save_fragment_shader(r300->blitter, r300->fs);
     util_blitter_save_vertex_shader(r300->blitter, r300->vs);
+    util_blitter_save_vertex_elements(r300->blitter, r300->vs);
 }
 
 /* Clear currently bound buffers. */
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 1eba8a8ed1..0ee0ab47a6 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -247,6 +247,11 @@ struct r300_vertex_info {
     uint32_t vap_prog_stream_cntl_ext[8];
 };
 
+struct r300_velems_state {
+    unsigned count;
+    struct pipe_vertex_element velem[];
+};
+
 extern struct pipe_viewport_state r300_viewport_identity;
 
 struct r300_context {
@@ -317,8 +322,7 @@ struct r300_context {
     struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
     int vertex_buffer_count;
     /* Vertex elements for Gallium. */
-    struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
-    int vertex_element_count;
+    struct r300_velems_state *velems;
 
     struct pipe_stencil_ref stencil_ref;
 
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 88fe166359..fa3df9a23b 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -776,9 +776,9 @@ void r300_emit_texture(struct r300_context* r300,
 void r300_emit_aos(struct r300_context* r300, unsigned offset)
 {
     struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer;
-    struct pipe_vertex_element *velem = r300->vertex_element;
+    struct pipe_vertex_element *velem = r300->velems->velem;
     int i;
-    unsigned size1, size2, aos_count = r300->vertex_element_count;
+    unsigned size1, size2, aos_count = r300->velems->count;
     unsigned packet_size = (aos_count * 3 + 1) / 2;
     CS_LOCALS(r300);
 
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index cd4971ae13..1850a6f247 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -128,7 +128,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
 {
     struct pipe_vertex_element* velem;
     struct pipe_vertex_buffer* vbuf;
-    unsigned vertex_element_count = r300->vertex_element_count;
+    unsigned vertex_element_count = r300->velems->count;
     unsigned i, v, vbi, dw, elem_offset;
 
     /* Size of the vertex, in dwords. */
@@ -151,7 +151,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
 
     /* Calculate the vertex size, offsets, strides etc. and map the buffers. */
     for (i = 0; i < vertex_element_count; i++) {
-        velem = &r300->vertex_element[i];
+        velem = &r300->velems->velem[i];
         offset[i] = velem->src_offset / 4;
         size[i] = util_format_get_blocksize(velem->src_format) / 4;
         vertex_size += size[i];
@@ -183,7 +183,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
     /* Emit vertices. */
     for (v = 0; v < count; v++) {
         for (i = 0; i < vertex_element_count; i++) {
-            velem = &r300->vertex_element[i];
+            velem = &r300->velems->velem[i];
             vbi = velem->vertex_buffer_index;
             elem_offset = offset[i] + stride[vbi] * (v + start);
 
@@ -196,7 +196,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
 
     /* Unmap buffers. */
     for (i = 0; i < vertex_element_count; i++) {
-        vbi = r300->vertex_element[i].vertex_buffer_index;
+        vbi = r300->velems->velem[i].vertex_buffer_index;
 
         if (map[vbi]) {
             vbuf = &r300->vertex_buffer[vbi];
@@ -278,11 +278,11 @@ static void r300_emit_draw_elements(struct r300_context *r300,
 static boolean r300_setup_vertex_buffers(struct r300_context *r300)
 {
     struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
-    struct pipe_vertex_element *velem = r300->vertex_element;
+    struct pipe_vertex_element *velem = r300->velems->velem;
     struct pipe_buffer *pbuf;
 
 validate:
-    for (int i = 0; i < r300->vertex_element_count; i++) {
+    for (int i = 0; i < r300->velems->count; i++) {
         pbuf = vbuf[velem[i].vertex_buffer_index].buffer;
 
         if (!r300->winsys->add_buffer(r300->winsys, pbuf,
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 34bf81c193..ceac690fc4 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -1044,11 +1044,11 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe,
 static boolean r300_validate_aos(struct r300_context *r300)
 {
     struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
-    struct pipe_vertex_element *velem = r300->vertex_element;
+    struct pipe_vertex_element *velem = r300->velems->velem;
     int i;
 
     /* Check if formats and strides are aligned to the size of DWORD. */
-    for (i = 0; i < r300->vertex_element_count; i++) {
+    for (i = 0; i < r300->velems->count; i++) {
         if (vbuf[velem[i].vertex_buffer_index].stride % 4 != 0 ||
             util_format_get_blocksize(velem[i].src_format) % 4 != 0) {
             return FALSE;
@@ -1057,20 +1057,32 @@ static boolean r300_validate_aos(struct r300_context *r300)
     return TRUE;
 }
 
-static void r300_set_vertex_elements(struct pipe_context* pipe,
-                                    unsigned count,
-                                    const struct pipe_vertex_element* elements)
+static void* r300_create_vertex_elements_state(struct pipe_context* pipe,
+                                               unsigned count,
+                                               const struct pipe_vertex_element* attribs)
 {
-    struct r300_context* r300 = r300_context(pipe);
+   /*XXX could precalculate state here instead of later */
+    struct r300_velems_state *velems;
+    assert(count <= PIPE_MAX_ATTRIBS);
+    velems = (struct r300_velems_state *) MALLOC(sizeof(struct r300_velems_state) + count * sizeof(*attribs));
+    if (velems) {
+       velems->count = count;
+       memcpy(velems->velem, attribs, sizeof(*attribs) * count);
+    }
+    return velems;
+}
+
+static void r300_bind_vertex_elements_state(struct pipe_context *pipe,
+                                            void *velems)
+{
+    struct r300_context *r300 = r300_context(pipe);
+    struct r300_velems_state *r300_velems = (struct r300_velems_state *) velems;
 
-    memcpy(r300->vertex_element,
-           elements,
-           sizeof(struct pipe_vertex_element) * count);
-    r300->vertex_element_count = count;
+    r300->velems = r300_velems;
 
     if (r300->draw) {
         draw_flush(r300->draw);
-        draw_set_vertex_elements(r300->draw, count, elements);
+        draw_set_vertex_elements(r300->draw, r300_velems->count, r300_velems->velem);
     }
 
     if (!r300_validate_aos(r300)) {
@@ -1080,6 +1092,11 @@ static void r300_set_vertex_elements(struct pipe_context* pipe,
     }
 }
 
+static void r300_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
+{
+   FREE( velems );
+}
+
 static void* r300_create_vs_state(struct pipe_context* pipe,
                                   const struct pipe_shader_state* shader)
 {
@@ -1241,7 +1258,10 @@ void r300_init_state_functions(struct r300_context* r300)
     r300->context.set_viewport_state = r300_set_viewport_state;
 
     r300->context.set_vertex_buffers = r300_set_vertex_buffers;
-    r300->context.set_vertex_elements = r300_set_vertex_elements;
+
+    r300->context.create_vertex_elements_state = r300_create_vertex_elements_state;
+    r300->context.bind_vertex_elements_state = r300_bind_vertex_elements_state;
+    r300->context.delete_vertex_elements_state = r300_delete_vertex_elements_state;
 
     r300->context.create_vs_state = r300_create_vs_state;
     r300->context.bind_vs_state = r300_bind_vs_state;
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 2cbce9210a..c43a93601c 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -128,10 +128,10 @@ static void r300_vertex_psc(struct r300_context* r300)
     DBG(r300, DBG_DRAW, "r300: vs expects %d attribs, routing %d elements"
             " in psc\n",
             r300->vs->info.num_inputs,
-            r300->vertex_element_count);
+            r300->velems->count);
 
-    for (i = 0; i < r300->vertex_element_count; i++) {
-        format = r300->vertex_element[i].src_format;
+    for (i = 0; i < r300->velems->count; i++) {
+        format = r300->velems->velem[i].src_format;
 
         type = r300_translate_vertex_data_type(format) |
             (stream_tab[i] << R300_DST_VEC_LOC_SHIFT);
-- 
cgit v1.2.3


From 4dfe2df6296745e054db6c83cdd122417a3e3764 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Mon, 1 Mar 2010 18:47:04 +0100
Subject: cell: adapt to new vertex element cso

---
 src/gallium/drivers/cell/ppu/cell_context.h      |  8 +++--
 src/gallium/drivers/cell/ppu/cell_state_vertex.c | 41 ++++++++++++++++++------
 2 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h
index a77cc5b906..84ad0f3422 100644
--- a/src/gallium/drivers/cell/ppu/cell_context.h
+++ b/src/gallium/drivers/cell/ppu/cell_context.h
@@ -93,6 +93,11 @@ struct cell_buffer_list
    struct cell_buffer_node *head;
 };
 
+struct cell_velems_state
+{
+   unsigned count;
+   struct pipe_vertex_element velem[];
+}
 
 /**
  * Per-context state, subclass of pipe_context.
@@ -110,6 +115,7 @@ struct cell_context
    const struct pipe_rasterizer_state *rasterizer;
    const struct cell_vertex_shader_state *vs;
    const struct cell_fragment_shader_state *fs;
+   const struct cell_velems_state *velems;
 
    struct spe_function logic_op;
 
@@ -125,8 +131,6 @@ struct cell_context
    struct pipe_viewport_state viewport;
    struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
    uint num_vertex_buffers;
-   struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
-   uint num_vertex_elements;
 
    ubyte *cbuf_map[PIPE_MAX_COLOR_BUFS];
    ubyte *zsbuf_map;
diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c
index fbe55c8472..35c919fb6b 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c
@@ -32,24 +32,43 @@
 #include "cell_context.h"
 #include "cell_state.h"
 
+#include "util/u_memory.h"
 #include "draw/draw_context.h"
 
 
-static void
-cell_set_vertex_elements(struct pipe_context *pipe,
-                         unsigned count,
-                         const struct pipe_vertex_element *elements)
+void *
+cell_create_vertex_elements_state(struct pipe_context *pipe,
+                                  unsigned count,
+                                  const struct pipe_vertex_element *attribs)
 {
-   struct cell_context *cell = cell_context(pipe);
-
+   struct cell_velems_state *velems;
    assert(count <= PIPE_MAX_ATTRIBS);
+   velems = (struct cell_velems_state *) MALLOC(sizeof(struct cell_velems_state) + count * sizeof(*attribs));
+   if (velems) {
+      velems->count = count;
+      memcpy(velems->velem, attribs, sizeof(*attribs) * count);
+   }
+   return velems;
+}
+
+void
+cell_bind_vertex_elements_state(struct pipe_context *pipe,
+                                void *velems)
+{
+   struct cell_context *cell = cell_context(pipe);
+   struct cell_velems_state *cell_velems = (struct cell_velems_state *) velems;
 
-   memcpy(cell->vertex_element, elements, count * sizeof(elements[0]));
-   cell->num_vertex_elements = count;
+   cell->velems = cell_velems;
 
    cell->dirty |= CELL_NEW_VERTEX;
 
-   draw_set_vertex_elements(cell->draw, count, elements);
+   draw_set_vertex_elements(cell->draw, cell_velems->count, cell_velems->velem);
+}
+
+void
+cell_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
+{
+   FREE( velems );
 }
 
 
@@ -75,5 +94,7 @@ void
 cell_init_vertex_functions(struct cell_context *cell)
 {
    cell->pipe.set_vertex_buffers = cell_set_vertex_buffers;
-   cell->pipe.set_vertex_elements = cell_set_vertex_elements;
+   cell->pipe.create_vertex_elements_state = cell_create_vertex_elements_state;
+   cell->pipe.bind_vertex_elements_state = cell_bind_vertex_elements_state;
+   cell->pipe.delete_vertex_elements_state = cell_delete_vertex_elements_state;
 }
-- 
cgit v1.2.3


From 8e2df0dcb92b7b092b35df3d35591c31d5f2ca5f Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Mon, 1 Mar 2010 18:47:17 +0100
Subject: i915g: adapt to new vertex element cso

---
 src/gallium/drivers/i915/i915_context.h |  8 ++++++--
 src/gallium/drivers/i915/i915_state.c   | 35 +++++++++++++++++++++++++++------
 2 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h
index da769e7b29..369c63eece 100644
--- a/src/gallium/drivers/i915/i915_context.h
+++ b/src/gallium/drivers/i915/i915_context.h
@@ -148,7 +148,7 @@ struct i915_state
 
    /** Describes the current hardware vertex layout */
    struct vertex_info vertex_info;
-   
+
    unsigned id;			/* track lost context events */
 };
 
@@ -187,6 +187,11 @@ struct i915_sampler_state {
    unsigned maxlod;
 };
 
+struct i915_velems_state {
+   unsigned count;
+   struct pipe_vertex_element velem[];
+};
+
 struct i915_texture {
    struct pipe_texture base;
 
@@ -247,7 +252,6 @@ struct i915_context
 
    unsigned num_samplers;
    unsigned num_textures;
-   unsigned num_vertex_elements;
    unsigned num_vertex_buffers;
 
    struct intel_batchbuffer *batch;
diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c
index 62169918e2..46406065c3 100644
--- a/src/gallium/drivers/i915/i915_state.c
+++ b/src/gallium/drivers/i915/i915_state.c
@@ -742,21 +742,42 @@ static void i915_set_vertex_buffers(struct pipe_context *pipe,
    draw_set_vertex_buffers(i915->draw, count, buffers);
 }
 
-static void i915_set_vertex_elements(struct pipe_context *pipe,
-                                     unsigned count,
-                                     const struct pipe_vertex_element *elements)
+static void *
+i915_create_vertex_elements_state(struct pipe_context *pipe,
+                                  unsigned count,
+                                  const struct pipe_vertex_element *attribs)
+{
+   struct i915_velems_state *velems;
+   assert(count <= PIPE_MAX_ATTRIBS);
+   velems = (struct i915_velems_state *) MALLOC(sizeof(struct i915_velems_state) + count * sizeof(*attribs));
+   if (velems) {
+      velems->count = count;
+      memcpy(velems->velem, attribs, sizeof(*attribs) * count);
+   }
+   return velems;
+}
+
+static void
+i915_bind_vertex_elements_state(struct pipe_context *pipe,
+                                void *velems)
 {
    struct i915_context *i915 = i915_context(pipe);
+   struct i915_velems_state *i915_velems = (struct i915_velems_state *) velems;
+
    /* Because we change state before the draw_set_vertex_buffers call
     * we need a flush here, just to be sure.
     */
    draw_flush(i915->draw);
 
-   i915->num_vertex_elements = count;
    /* pass-through to draw module */
-   draw_set_vertex_elements(i915->draw, count, elements);
+   draw_set_vertex_elements(i915->draw, i915_velems->count, i915_velems->velem);
 }
 
+static void
+i915_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
+{
+   FREE( velems );
+}
 
 void
 i915_init_state_functions( struct i915_context *i915 )
@@ -782,6 +803,9 @@ i915_init_state_functions( struct i915_context *i915 )
    i915->base.create_vs_state = i915_create_vs_state;
    i915->base.bind_vs_state = i915_bind_vs_state;
    i915->base.delete_vs_state = i915_delete_vs_state;
+   i915->base.create_vertex_elements_state = i915_create_vertex_elements_state;
+   i915->base.bind_vertex_elements_state = i915_bind_vertex_elements_state;
+   i915->base.delete_vertex_elements_state = i915_delete_vertex_elements_state;
 
    i915->base.set_blend_color = i915_set_blend_color;
    i915->base.set_stencil_ref = i915_set_stencil_ref;
@@ -794,5 +818,4 @@ i915_init_state_functions( struct i915_context *i915 )
    i915->base.set_fragment_sampler_textures = i915_set_sampler_textures;
    i915->base.set_viewport_state = i915_set_viewport_state;
    i915->base.set_vertex_buffers = i915_set_vertex_buffers;
-   i915->base.set_vertex_elements = i915_set_vertex_elements;
 }
-- 
cgit v1.2.3


From 470dbb84b8e892bf6d5a9136b40dd68f5b146a05 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Mon, 1 Mar 2010 18:47:28 +0100
Subject: i965g: adapt to new vertex element cso

---
 src/gallium/drivers/i965/brw_context.h     |   5 +-
 src/gallium/drivers/i965/brw_draw_upload.c | 211 +-----------------------
 src/gallium/drivers/i965/brw_pipe_vertex.c | 247 ++++++++++++++++++++++++++++-
 src/gallium/drivers/i965/brw_structs.h     |   4 +-
 4 files changed, 251 insertions(+), 216 deletions(-)

diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 12cfa7b049..f5b1a06576 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -351,7 +351,7 @@ struct brw_vs_prog_data {
 
 /* Size == 0 if output either not written, or always [0,0,0,1]
  */
-struct brw_vs_ouput_sizes {
+struct brw_vs_output_sizes {
    GLubyte output_size[PIPE_MAX_SHADER_OUTPUTS];
 };
 
@@ -546,14 +546,13 @@ struct brw_context
       const struct brw_blend_state *blend;
       const struct brw_rasterizer_state *rast;
       const struct brw_depth_stencil_state *zstencil;
+      const struct brw_vertex_element_packet *velems;
 
       const struct brw_sampler *sampler[PIPE_MAX_SAMPLERS];
       unsigned num_samplers;
 
       struct pipe_texture *texture[PIPE_MAX_SAMPLERS];
       struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
-      struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
-      unsigned num_vertex_elements;
       unsigned num_textures;
       unsigned num_vertex_buffers;
 
diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c
index 9eafdd4085..0820ba20a0 100644
--- a/src/gallium/drivers/i965/brw_draw_upload.c
+++ b/src/gallium/drivers/i965/brw_draw_upload.c
@@ -30,7 +30,6 @@
 
 #include "util/u_upload_mgr.h"
 #include "util/u_math.h"
-#include "util/u_format.h"
 
 #include "brw_draw.h"
 #include "brw_defines.h"
@@ -43,141 +42,6 @@
 
 
-static unsigned brw_translate_surface_format( unsigned id )
-{
-   switch (id) {
-   case PIPE_FORMAT_R64_FLOAT:
-      return BRW_SURFACEFORMAT_R64_FLOAT;
-   case PIPE_FORMAT_R64G64_FLOAT:
-      return BRW_SURFACEFORMAT_R64G64_FLOAT;
-   case PIPE_FORMAT_R64G64B64_FLOAT:
-      return BRW_SURFACEFORMAT_R64G64B64_FLOAT;
-   case PIPE_FORMAT_R64G64B64A64_FLOAT:
-      return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT;
-
-   case PIPE_FORMAT_R32_FLOAT:
-      return BRW_SURFACEFORMAT_R32_FLOAT;
-   case PIPE_FORMAT_R32G32_FLOAT:
-      return BRW_SURFACEFORMAT_R32G32_FLOAT;
-   case PIPE_FORMAT_R32G32B32_FLOAT:
-      return BRW_SURFACEFORMAT_R32G32B32_FLOAT;
-   case PIPE_FORMAT_R32G32B32A32_FLOAT:
-      return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
-
-   case PIPE_FORMAT_R32_UNORM:
-      return BRW_SURFACEFORMAT_R32_UNORM;
-   case PIPE_FORMAT_R32G32_UNORM:
-      return BRW_SURFACEFORMAT_R32G32_UNORM;
-   case PIPE_FORMAT_R32G32B32_UNORM:
-      return BRW_SURFACEFORMAT_R32G32B32_UNORM;
-   case PIPE_FORMAT_R32G32B32A32_UNORM:
-      return BRW_SURFACEFORMAT_R32G32B32A32_UNORM;
-
-   case PIPE_FORMAT_R32_USCALED:
-      return BRW_SURFACEFORMAT_R32_USCALED;
-   case PIPE_FORMAT_R32G32_USCALED:
-      return BRW_SURFACEFORMAT_R32G32_USCALED;
-   case PIPE_FORMAT_R32G32B32_USCALED:
-      return BRW_SURFACEFORMAT_R32G32B32_USCALED;
-   case PIPE_FORMAT_R32G32B32A32_USCALED:
-      return BRW_SURFACEFORMAT_R32G32B32A32_USCALED;
-
-   case PIPE_FORMAT_R32_SNORM:
-      return BRW_SURFACEFORMAT_R32_SNORM;
-   case PIPE_FORMAT_R32G32_SNORM:
-      return BRW_SURFACEFORMAT_R32G32_SNORM;
-   case PIPE_FORMAT_R32G32B32_SNORM:
-      return BRW_SURFACEFORMAT_R32G32B32_SNORM;
-   case PIPE_FORMAT_R32G32B32A32_SNORM:
-      return BRW_SURFACEFORMAT_R32G32B32A32_SNORM;
-
-   case PIPE_FORMAT_R32_SSCALED:
-      return BRW_SURFACEFORMAT_R32_SSCALED;
-   case PIPE_FORMAT_R32G32_SSCALED:
-      return BRW_SURFACEFORMAT_R32G32_SSCALED;
-   case PIPE_FORMAT_R32G32B32_SSCALED:
-      return BRW_SURFACEFORMAT_R32G32B32_SSCALED;
-   case PIPE_FORMAT_R32G32B32A32_SSCALED:
-      return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED;
-
-   case PIPE_FORMAT_R16_UNORM:
-      return BRW_SURFACEFORMAT_R16_UNORM;
-   case PIPE_FORMAT_R16G16_UNORM:
-      return BRW_SURFACEFORMAT_R16G16_UNORM;
-   case PIPE_FORMAT_R16G16B16_UNORM:
-      return BRW_SURFACEFORMAT_R16G16B16_UNORM;
-   case PIPE_FORMAT_R16G16B16A16_UNORM:
-      return BRW_SURFACEFORMAT_R16G16B16A16_UNORM;
-
-   case PIPE_FORMAT_R16_USCALED:
-      return BRW_SURFACEFORMAT_R16_USCALED;
-   case PIPE_FORMAT_R16G16_USCALED:
-      return BRW_SURFACEFORMAT_R16G16_USCALED;
-   case PIPE_FORMAT_R16G16B16_USCALED:
-      return BRW_SURFACEFORMAT_R16G16B16_USCALED;
-   case PIPE_FORMAT_R16G16B16A16_USCALED:
-      return BRW_SURFACEFORMAT_R16G16B16A16_USCALED;
-
-   case PIPE_FORMAT_R16_SNORM:
-      return BRW_SURFACEFORMAT_R16_SNORM;
-   case PIPE_FORMAT_R16G16_SNORM:
-      return BRW_SURFACEFORMAT_R16G16_SNORM;
-   case PIPE_FORMAT_R16G16B16_SNORM:
-      return BRW_SURFACEFORMAT_R16G16B16_SNORM;
-   case PIPE_FORMAT_R16G16B16A16_SNORM:
-      return BRW_SURFACEFORMAT_R16G16B16A16_SNORM;
-
-   case PIPE_FORMAT_R16_SSCALED:
-      return BRW_SURFACEFORMAT_R16_SSCALED;
-   case PIPE_FORMAT_R16G16_SSCALED:
-      return BRW_SURFACEFORMAT_R16G16_SSCALED;
-   case PIPE_FORMAT_R16G16B16_SSCALED:
-      return BRW_SURFACEFORMAT_R16G16B16_SSCALED;
-   case PIPE_FORMAT_R16G16B16A16_SSCALED:
-      return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED;
-
-   case PIPE_FORMAT_R8_UNORM:
-      return BRW_SURFACEFORMAT_R8_UNORM;
-   case PIPE_FORMAT_R8G8_UNORM:
-      return BRW_SURFACEFORMAT_R8G8_UNORM;
-   case PIPE_FORMAT_R8G8B8_UNORM:
-      return BRW_SURFACEFORMAT_R8G8B8_UNORM;
-   case PIPE_FORMAT_R8G8B8A8_UNORM:
-      return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
-
-   case PIPE_FORMAT_R8_USCALED:
-      return BRW_SURFACEFORMAT_R8_USCALED;
-   case PIPE_FORMAT_R8G8_USCALED:
-      return BRW_SURFACEFORMAT_R8G8_USCALED;
-   case PIPE_FORMAT_R8G8B8_USCALED:
-      return BRW_SURFACEFORMAT_R8G8B8_USCALED;
-   case PIPE_FORMAT_R8G8B8A8_USCALED:
-      return BRW_SURFACEFORMAT_R8G8B8A8_USCALED;
-
-   case PIPE_FORMAT_R8_SNORM:
-      return BRW_SURFACEFORMAT_R8_SNORM;
-   case PIPE_FORMAT_R8G8_SNORM:
-      return BRW_SURFACEFORMAT_R8G8_SNORM;
-   case PIPE_FORMAT_R8G8B8_SNORM:
-      return BRW_SURFACEFORMAT_R8G8B8_SNORM;
-   case PIPE_FORMAT_R8G8B8A8_SNORM:
-      return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
-
-   case PIPE_FORMAT_R8_SSCALED:
-      return BRW_SURFACEFORMAT_R8_SSCALED;
-   case PIPE_FORMAT_R8G8_SSCALED:
-      return BRW_SURFACEFORMAT_R8G8_SSCALED;
-   case PIPE_FORMAT_R8G8B8_SSCALED:
-      return BRW_SURFACEFORMAT_R8G8B8_SSCALED;
-   case PIPE_FORMAT_R8G8B8A8_SSCALED:
-      return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED;
-
-   default:
-      assert(0);
-      return 0;
-   }
-}
-
 static unsigned get_index_type(int type)
 {
    switch (type) {
@@ -316,77 +180,16 @@ static int brw_emit_vertex_buffers( struct brw_context *brw )
 
 
-
 static int brw_emit_vertex_elements(struct brw_context *brw)
 {
-   GLuint nr = brw->curr.num_vertex_elements;
-   GLuint i;
+   const struct brw_vertex_element_packet *brw_velems = brw->curr.velems;
+   unsigned size = brw_velems->header.length + 2;
 
+   /* why is this here */
    brw_emit_query_begin(brw);
 
-   /* If the VS doesn't read any inputs (calculating vertex position from
-    * a state variable for some reason, for example), emit a single pad
-    * VERTEX_ELEMENT struct and bail.
-    *
-    * The stale VB state stays in place, but they don't do anything unless
-    * a VE loads from them.
-    */
-   if (nr == 0) {
-      BEGIN_BATCH(3, IGNORE_CLIPRECTS);
-      OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1);
-      OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
-		BRW_VE0_VALID |
-		(BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
-		(0 << BRW_VE0_SRC_OFFSET_SHIFT));
-      OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
-		(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
-		(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
-		(BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
-      ADVANCE_BATCH();
-      return 0;
-   }
-
-   /* Now emit vertex element (VEP) state packets.
-    *
-    */
-   BEGIN_BATCH(1 + nr * 2, IGNORE_CLIPRECTS);
-   OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + nr * 2) - 2));
-   for (i = 0; i < nr; i++) {
-      const struct pipe_vertex_element *input = &brw->curr.vertex_element[i];
-      unsigned nr_components = util_format_get_nr_components(input->src_format);
-
-      uint32_t format = brw_translate_surface_format( input->src_format );
-      uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
-      uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
-      uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
-      uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;
-
-      switch (nr_components) {
-      case 0: comp0 = BRW_VE1_COMPONENT_STORE_0;
-      case 1: comp1 = BRW_VE1_COMPONENT_STORE_0;
-      case 2: comp2 = BRW_VE1_COMPONENT_STORE_0;
-      case 3: comp3 = BRW_VE1_COMPONENT_STORE_1_FLT;
-	 break;
-      }
-
-      OUT_BATCH((input->vertex_buffer_index << BRW_VE0_INDEX_SHIFT) |
-		BRW_VE0_VALID |
-		(format << BRW_VE0_FORMAT_SHIFT) |
-		(input->src_offset << BRW_VE0_SRC_OFFSET_SHIFT));
+   brw_batchbuffer_data(brw->batch, brw_velems, size * 4, IGNORE_CLIPRECTS);
 
-      if (BRW_IS_IGDNG(brw))
-          OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
-                    (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
-                    (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
-                    (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
-      else
-          OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
-                    (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
-                    (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
-                    (comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
-                    ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
-   }
-   ADVANCE_BATCH();
    return 0;
 }
 
@@ -399,10 +202,11 @@ static int brw_emit_vertices( struct brw_context *brw )
    if (ret)
       return ret;
 
+   /* XXX should separate this? */
    ret = brw_emit_vertex_elements( brw );
    if (ret)
       return ret;
-   
+
    return 0;
 }
 
@@ -410,7 +214,8 @@ static int brw_emit_vertices( struct brw_context *brw )
 const struct brw_tracked_state brw_vertices = {
    .dirty = {
       .mesa = (PIPE_NEW_INDEX_RANGE |
-               PIPE_NEW_VERTEX_BUFFER),
+               PIPE_NEW_VERTEX_BUFFER |
+               PIPE_NEW_VERTEX_ELEMENT),
       .brw = BRW_NEW_BATCH,
       .cache = 0,
    },
diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c
index e3c48e3149..3d84fb86fb 100644
--- a/src/gallium/drivers/i965/brw_pipe_vertex.c
+++ b/src/gallium/drivers/i965/brw_pipe_vertex.c
@@ -1,22 +1,251 @@
 #include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_structs.h"
 
+#include "util/u_memory.h"
+#include "util/u_format.h"
 
-static void brw_set_vertex_elements( struct pipe_context *pipe,
-				     unsigned count,
-				     const struct pipe_vertex_element *elements )
+
+static unsigned brw_translate_surface_format( unsigned id )
+{
+   switch (id) {
+   case PIPE_FORMAT_R64_FLOAT:
+      return BRW_SURFACEFORMAT_R64_FLOAT;
+   case PIPE_FORMAT_R64G64_FLOAT:
+      return BRW_SURFACEFORMAT_R64G64_FLOAT;
+   case PIPE_FORMAT_R64G64B64_FLOAT:
+      return BRW_SURFACEFORMAT_R64G64B64_FLOAT;
+   case PIPE_FORMAT_R64G64B64A64_FLOAT:
+      return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT;
+
+   case PIPE_FORMAT_R32_FLOAT:
+      return BRW_SURFACEFORMAT_R32_FLOAT;
+   case PIPE_FORMAT_R32G32_FLOAT:
+      return BRW_SURFACEFORMAT_R32G32_FLOAT;
+   case PIPE_FORMAT_R32G32B32_FLOAT:
+      return BRW_SURFACEFORMAT_R32G32B32_FLOAT;
+   case PIPE_FORMAT_R32G32B32A32_FLOAT:
+      return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+
+   case PIPE_FORMAT_R32_UNORM:
+      return BRW_SURFACEFORMAT_R32_UNORM;
+   case PIPE_FORMAT_R32G32_UNORM:
+      return BRW_SURFACEFORMAT_R32G32_UNORM;
+   case PIPE_FORMAT_R32G32B32_UNORM:
+      return BRW_SURFACEFORMAT_R32G32B32_UNORM;
+   case PIPE_FORMAT_R32G32B32A32_UNORM:
+      return BRW_SURFACEFORMAT_R32G32B32A32_UNORM;
+
+   case PIPE_FORMAT_R32_USCALED:
+      return BRW_SURFACEFORMAT_R32_USCALED;
+   case PIPE_FORMAT_R32G32_USCALED:
+      return BRW_SURFACEFORMAT_R32G32_USCALED;
+   case PIPE_FORMAT_R32G32B32_USCALED:
+      return BRW_SURFACEFORMAT_R32G32B32_USCALED;
+   case PIPE_FORMAT_R32G32B32A32_USCALED:
+      return BRW_SURFACEFORMAT_R32G32B32A32_USCALED;
+
+   case PIPE_FORMAT_R32_SNORM:
+      return BRW_SURFACEFORMAT_R32_SNORM;
+   case PIPE_FORMAT_R32G32_SNORM:
+      return BRW_SURFACEFORMAT_R32G32_SNORM;
+   case PIPE_FORMAT_R32G32B32_SNORM:
+      return BRW_SURFACEFORMAT_R32G32B32_SNORM;
+   case PIPE_FORMAT_R32G32B32A32_SNORM:
+      return BRW_SURFACEFORMAT_R32G32B32A32_SNORM;
+
+   case PIPE_FORMAT_R32_SSCALED:
+      return BRW_SURFACEFORMAT_R32_SSCALED;
+   case PIPE_FORMAT_R32G32_SSCALED:
+      return BRW_SURFACEFORMAT_R32G32_SSCALED;
+   case PIPE_FORMAT_R32G32B32_SSCALED:
+      return BRW_SURFACEFORMAT_R32G32B32_SSCALED;
+   case PIPE_FORMAT_R32G32B32A32_SSCALED:
+      return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED;
+
+   case PIPE_FORMAT_R16_UNORM:
+      return BRW_SURFACEFORMAT_R16_UNORM;
+   case PIPE_FORMAT_R16G16_UNORM:
+      return BRW_SURFACEFORMAT_R16G16_UNORM;
+   case PIPE_FORMAT_R16G16B16_UNORM:
+      return BRW_SURFACEFORMAT_R16G16B16_UNORM;
+   case PIPE_FORMAT_R16G16B16A16_UNORM:
+      return BRW_SURFACEFORMAT_R16G16B16A16_UNORM;
+
+   case PIPE_FORMAT_R16_USCALED:
+      return BRW_SURFACEFORMAT_R16_USCALED;
+   case PIPE_FORMAT_R16G16_USCALED:
+      return BRW_SURFACEFORMAT_R16G16_USCALED;
+   case PIPE_FORMAT_R16G16B16_USCALED:
+      return BRW_SURFACEFORMAT_R16G16B16_USCALED;
+   case PIPE_FORMAT_R16G16B16A16_USCALED:
+      return BRW_SURFACEFORMAT_R16G16B16A16_USCALED;
+
+   case PIPE_FORMAT_R16_SNORM:
+      return BRW_SURFACEFORMAT_R16_SNORM;
+   case PIPE_FORMAT_R16G16_SNORM:
+      return BRW_SURFACEFORMAT_R16G16_SNORM;
+   case PIPE_FORMAT_R16G16B16_SNORM:
+      return BRW_SURFACEFORMAT_R16G16B16_SNORM;
+   case PIPE_FORMAT_R16G16B16A16_SNORM:
+      return BRW_SURFACEFORMAT_R16G16B16A16_SNORM;
+
+   case PIPE_FORMAT_R16_SSCALED:
+      return BRW_SURFACEFORMAT_R16_SSCALED;
+   case PIPE_FORMAT_R16G16_SSCALED:
+      return BRW_SURFACEFORMAT_R16G16_SSCALED;
+   case PIPE_FORMAT_R16G16B16_SSCALED:
+      return BRW_SURFACEFORMAT_R16G16B16_SSCALED;
+   case PIPE_FORMAT_R16G16B16A16_SSCALED:
+      return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED;
+
+   case PIPE_FORMAT_R8_UNORM:
+      return BRW_SURFACEFORMAT_R8_UNORM;
+   case PIPE_FORMAT_R8G8_UNORM:
+      return BRW_SURFACEFORMAT_R8G8_UNORM;
+   case PIPE_FORMAT_R8G8B8_UNORM:
+      return BRW_SURFACEFORMAT_R8G8B8_UNORM;
+   case PIPE_FORMAT_R8G8B8A8_UNORM:
+      return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
+
+   case PIPE_FORMAT_R8_USCALED:
+      return BRW_SURFACEFORMAT_R8_USCALED;
+   case PIPE_FORMAT_R8G8_USCALED:
+      return BRW_SURFACEFORMAT_R8G8_USCALED;
+   case PIPE_FORMAT_R8G8B8_USCALED:
+      return BRW_SURFACEFORMAT_R8G8B8_USCALED;
+   case PIPE_FORMAT_R8G8B8A8_USCALED:
+      return BRW_SURFACEFORMAT_R8G8B8A8_USCALED;
+
+   case PIPE_FORMAT_R8_SNORM:
+      return BRW_SURFACEFORMAT_R8_SNORM;
+   case PIPE_FORMAT_R8G8_SNORM:
+      return BRW_SURFACEFORMAT_R8G8_SNORM;
+   case PIPE_FORMAT_R8G8B8_SNORM:
+      return BRW_SURFACEFORMAT_R8G8B8_SNORM;
+   case PIPE_FORMAT_R8G8B8A8_SNORM:
+      return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
+
+   case PIPE_FORMAT_R8_SSCALED:
+      return BRW_SURFACEFORMAT_R8_SSCALED;
+   case PIPE_FORMAT_R8G8_SSCALED:
+      return BRW_SURFACEFORMAT_R8G8_SSCALED;
+   case PIPE_FORMAT_R8G8B8_SSCALED:
+      return BRW_SURFACEFORMAT_R8G8B8_SSCALED;
+   case PIPE_FORMAT_R8G8B8A8_SSCALED:
+      return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED;
+
+   default:
+      assert(0);
+      return 0;
+   }
+}
+
+static void brw_translate_vertex_elements(struct brw_context *brw,
+                                          struct brw_vertex_element_packet *brw_velems,
+                                          const struct pipe_vertex_element *attribs,
+                                          unsigned count)
+{
+   unsigned i;
+
+   /* If the VS doesn't read any inputs (calculating vertex position from
+    * a state variable for some reason, for example), emit a single pad
+    * VERTEX_ELEMENT struct and bail.
+    *
+    * The stale VB state stays in place, but they don't do anything unless
+    * a VE loads from them.
+    */
+   brw_velems->header.opcode = CMD_VERTEX_ELEMENT;
+
+   if (count == 0) {
+      brw_velems->header.length = 1;
+      brw_velems->ve[0].ve0.src_offset = 0;
+      brw_velems->ve[0].ve0.src_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+      brw_velems->ve[0].ve0.valid = 1;
+      brw_velems->ve[0].ve0.vertex_buffer_index = 0;
+      brw_velems->ve[0].ve1.dst_offset = 0;
+      brw_velems->ve[0].ve1.vfcomponent0 = BRW_VE1_COMPONENT_STORE_0;
+      brw_velems->ve[0].ve1.vfcomponent1 = BRW_VE1_COMPONENT_STORE_0;
+      brw_velems->ve[0].ve1.vfcomponent2 = BRW_VE1_COMPONENT_STORE_0;
+      brw_velems->ve[0].ve1.vfcomponent3 = BRW_VE1_COMPONENT_STORE_1_FLT;
+      return;
+   }
+
+
+   /* Now emit vertex element (VEP) state packets.
+    *
+    */
+   brw_velems->header.length = (1 + count * 2) - 2;
+   for (i = 0; i < count; i++) {
+      const struct pipe_vertex_element *input = &attribs[i];
+      unsigned nr_components = util_format_get_nr_components(input->src_format);
+
+      uint32_t format = brw_translate_surface_format( input->src_format );
+      uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
+      uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
+      uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
+      uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;
+
+      switch (nr_components) {
+      case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */
+      case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */
+      case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */
+      case 3: comp3 = BRW_VE1_COMPONENT_STORE_1_FLT;
+         break;
+      }
+
+      brw_velems->ve[i].ve0.src_offset = input->src_offset;
+      brw_velems->ve[i].ve0.src_format = format;
+      brw_velems->ve[i].ve0.valid = 1;
+      brw_velems->ve[i].ve0.vertex_buffer_index = input->vertex_buffer_index;
+      brw_velems->ve[i].ve1.vfcomponent0 = comp0;
+      brw_velems->ve[i].ve1.vfcomponent1 = comp1;
+      brw_velems->ve[i].ve1.vfcomponent2 = comp2;
+      brw_velems->ve[i].ve1.vfcomponent3 = comp3;
+
+      if (BRW_IS_IGDNG(brw))
+         brw_velems->ve[i].ve1.dst_offset = 0;
+      else
+         brw_velems->ve[i].ve1.dst_offset = i * 4;
+   }
+}
+
+static void* brw_create_vertex_elements_state( struct pipe_context *pipe,
+                                               unsigned count,
+                                               const struct pipe_vertex_element *attribs )
 {
+   /* note: for the brw_swtnl.c code (if ever we need draw fallback) we'd also need
+      store the original data */
    struct brw_context *brw = brw_context(pipe);
+   struct brw_vertex_element_packet *velems;
+   assert(count <= BRW_VEP_MAX);
+   velems = (struct brw_vertex_element_packet *) MALLOC(sizeof(struct brw_vertex_element_packet));
+   if (velems) {
+      brw_translate_vertex_elements(brw, velems, attribs, count);
+   }
+   return velems;
+}
 
-   memcpy(brw->curr.vertex_element, elements, count * sizeof(elements[0]));
-   brw->curr.num_vertex_elements = count;
+static void brw_bind_vertex_elements_state(struct pipe_context *pipe,
+                                           void *velems)
+{
+   struct brw_context *brw = brw_context(pipe);
+   struct brw_vertex_element_packet *brw_velems = (struct brw_vertex_element_packet *) velems;
+
+   brw->curr.velems = brw_velems;
 
    brw->state.dirty.mesa |= PIPE_NEW_VERTEX_ELEMENT;
 }
 
+static void brw_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
+{
+   FREE( velems );
+}
+
 
 static void brw_set_vertex_buffers(struct pipe_context *pipe,
-				   unsigned count,
-				   const struct pipe_vertex_buffer *buffers)
+                                   unsigned count,
+                                   const struct pipe_vertex_buffer *buffers)
 {
    struct brw_context *brw = brw_context(pipe);
    unsigned i;
@@ -49,7 +278,9 @@ void
 brw_pipe_vertex_init( struct brw_context *brw )
 {
    brw->base.set_vertex_buffers = brw_set_vertex_buffers;
-   brw->base.set_vertex_elements = brw_set_vertex_elements;
+   brw->base.create_vertex_elements_state = brw_create_vertex_elements_state;
+   brw->base.bind_vertex_elements_state = brw_bind_vertex_elements_state;
+   brw->base.delete_vertex_elements_state = brw_delete_vertex_elements_state;
 }
 
 
diff --git a/src/gallium/drivers/i965/brw_structs.h b/src/gallium/drivers/i965/brw_structs.h
index bf10bc04de..e97ddeb5e1 100644
--- a/src/gallium/drivers/i965/brw_structs.h
+++ b/src/gallium/drivers/i965/brw_structs.h
@@ -28,7 +28,7 @@
   * Authors:
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
-        
+
 
 #ifndef BRW_STRUCTS_H
 #define BRW_STRUCTS_H
@@ -1149,7 +1149,7 @@ struct brw_vertex_element_state
       GLuint valid:1; 
       GLuint vertex_buffer_index:5; 
    } ve0;
-   
+
    struct
    {
       GLuint dst_offset:8; 
-- 
cgit v1.2.3


From 2918edb46acf58b467e4351d627912f4c8cdb32c Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Mon, 1 Mar 2010 18:47:54 +0100
Subject: failover: adapt to new vertex element cso

---
 src/gallium/drivers/failover/fo_context.h    |  3 +-
 src/gallium/drivers/failover/fo_state.c      | 65 ++++++++++++++++++++--------
 src/gallium/drivers/failover/fo_state_emit.c | 10 ++---
 3 files changed, 52 insertions(+), 26 deletions(-)

diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h
index bb1a168ea7..4a754465bb 100644
--- a/src/gallium/drivers/failover/fo_context.h
+++ b/src/gallium/drivers/failover/fo_context.h
@@ -78,6 +78,7 @@ struct failover_context {
    const struct fo_state     *rasterizer;
    const struct fo_state     *fragment_shader;
    const struct fo_state     *vertex_shader;
+   const struct fo_state     *vertex_elements;
 
    struct pipe_blend_color blend_color;
    struct pipe_stencil_ref stencil_ref;
@@ -89,10 +90,8 @@ struct failover_context {
    struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS];
    struct pipe_viewport_state viewport;
    struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
-   struct pipe_vertex_element vertex_elements[PIPE_MAX_ATTRIBS];
 
    uint num_vertex_buffers;
-   uint num_vertex_elements;
 
    void *sw_sampler_state[PIPE_MAX_SAMPLERS];
    void *hw_sampler_state[PIPE_MAX_SAMPLERS];
diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c
index 970606a3f5..0247fb803b 100644
--- a/src/gallium/drivers/failover/fo_state.c
+++ b/src/gallium/drivers/failover/fo_state.c
@@ -255,9 +255,52 @@ failover_delete_vs_state(struct pipe_context *pipe,
    free(state);
 }
 
+
+
+static void *
+failover_create_vertex_elements_state( struct pipe_context *pipe,
+                                       unsigned count,
+                                       const struct pipe_vertex_element *velems )
+{
+   struct fo_state *state = malloc(sizeof(struct fo_state));
+   struct failover_context *failover = failover_context(pipe);
+
+   state->sw_state = failover->sw->create_vertex_elements_state(failover->sw, count, velems);
+   state->hw_state = failover->hw->create_vertex_elements_state(failover->hw, count, velems);
+
+   return state;
+}
+
+static void
+failover_bind_vertex_elements_state(struct pipe_context *pipe,
+                                    void *velems )
+{
+   struct failover_context *failover = failover_context(pipe);
+   struct fo_state *state = (struct fo_state*)velems;
+
+   failover->vertex_elements = state;
+   failover->dirty |= FO_NEW_VERTEX_ELEMENT;
+   failover->sw->bind_vertex_elements_state( failover->sw, velems );
+   failover->hw->bind_vertex_elements_state( failover->hw, velems );
+}
+
+static void
+failover_delete_vertex_elements_state( struct pipe_context *pipe,
+                                       void *velems )
+{
+   struct fo_state *state = (struct fo_state*)velems;
+   struct failover_context *failover = failover_context(pipe);
+
+   failover->sw->delete_vertex_elements_state(failover->sw, state->sw_state);
+   failover->hw->delete_vertex_elements_state(failover->hw, state->hw_state);
+   state->sw_state = 0;
+   state->hw_state = 0;
+   free(state);
+}
+
 static void 
 failover_set_polygon_stipple( struct pipe_context *pipe,
-			      const struct pipe_poly_stipple *stipple )
+                              const struct pipe_poly_stipple *stipple )
 {
    struct failover_context *failover = failover_context(pipe);
 
@@ -490,22 +533,6 @@ failover_set_vertex_buffers(struct pipe_context *pipe,
 }
 
 
-static void
-failover_set_vertex_elements(struct pipe_context *pipe,
-                             unsigned count,
-                             const struct pipe_vertex_element *vertex_elements)
-{
-   struct failover_context *failover = failover_context(pipe);
-
-   memcpy(failover->vertex_elements, vertex_elements,
-          count * sizeof(vertex_elements[0]));
-
-   failover->dirty |= FO_NEW_VERTEX_ELEMENT;
-   failover->num_vertex_elements = count;
-   failover->sw->set_vertex_elements( failover->sw, count, vertex_elements );
-   failover->hw->set_vertex_elements( failover->hw, count, vertex_elements );
-}
-
 void
 failover_set_constant_buffer(struct pipe_context *pipe,
                              uint shader, uint index,
@@ -543,6 +570,9 @@ failover_init_state_functions( struct failover_context *failover )
    failover->pipe.create_vs_state = failover_create_vs_state;
    failover->pipe.bind_vs_state   = failover_bind_vs_state;
    failover->pipe.delete_vs_state = failover_delete_vs_state;
+   failover->pipe.create_vertex_elements_state = failover_create_vertex_elements_state;
+   failover->pipe.bind_vertex_elements_state = failover_bind_vertex_elements_state;
+   failover->pipe.delete_vertex_elements_state = failover_delete_vertex_elements_state;
 
    failover->pipe.set_blend_color = failover_set_blend_color;
    failover->pipe.set_stencil_ref = failover_set_stencil_ref;
@@ -554,6 +584,5 @@ failover_init_state_functions( struct failover_context *failover )
    failover->pipe.set_vertex_sampler_textures = failover_set_vertex_sampler_textures;
    failover->pipe.set_viewport_state = failover_set_viewport_state;
    failover->pipe.set_vertex_buffers = failover_set_vertex_buffers;
-   failover->pipe.set_vertex_elements = failover_set_vertex_elements;
    failover->pipe.set_constant_buffer = failover_set_constant_buffer;
 }
diff --git a/src/gallium/drivers/failover/fo_state_emit.c b/src/gallium/drivers/failover/fo_state_emit.c
index 5c00080842..09ca194497 100644
--- a/src/gallium/drivers/failover/fo_state_emit.c
+++ b/src/gallium/drivers/failover/fo_state_emit.c
@@ -81,6 +81,10 @@ failover_state_emit( struct failover_context *failover )
       failover->sw->bind_vs_state( failover->sw,
                                    failover->vertex_shader->sw_state );
 
+   if (failover->dirty & FO_NEW_VERTEX_ELEMENT)
+      failover->sw->bind_vertex_elements_state( failover->sw,
+                                                failover->vertex_elements->sw_state );
+
    if (failover->dirty & FO_NEW_STIPPLE)
       failover->sw->set_polygon_stipple( failover->sw, &failover->poly_stipple );
 
@@ -116,11 +120,5 @@ failover_state_emit( struct failover_context *failover )
                                         failover->vertex_buffers );
    }
 
-   if (failover->dirty & FO_NEW_VERTEX_ELEMENT) {
-      failover->sw->set_vertex_elements( failover->sw,
-                                         failover->num_vertex_elements,
-                                         failover->vertex_elements );
-   }
-
    failover->dirty = 0;
 }
-- 
cgit v1.2.3


From 881be308360ec8019f17bc970bece23e9395134f Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Mon, 1 Mar 2010 18:48:05 +0100
Subject: identity: adapt to new vertex element cso

---
 src/gallium/drivers/identity/id_context.c | 54 ++++++++++++++++++++++---------
 1 file changed, 39 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c
index 8248b2a413..05a9dfab77 100644
--- a/src/gallium/drivers/identity/id_context.c
+++ b/src/gallium/drivers/identity/id_context.c
@@ -377,6 +377,42 @@ identity_delete_vs_state(struct pipe_context *_pipe,
                          vs);
 }
 
+
+static void
+identity_create_vertex_elements_state(struct pipe_context *_pipe,
+                                      unsigned num_elements,
+                                      const struct pipe_vertex_element *vertex_elements)
+{
+   struct identity_context *id_pipe = identity_context(_pipe);
+   struct pipe_context *pipe = id_pipe->pipe;
+
+   pipe->create_vertex_elements_state(pipe,
+                                      num_elements,
+                                      vertex_elements);
+}
+
+static void
+identity_bind_vertex_elements_state(struct pipe_context *_pipe,
+                                    void *velems)
+{
+   struct identity_context *id_pipe = identity_context(_pipe);
+   struct pipe_context *pipe = id_pipe->pipe;
+
+   pipe->bind_vertex_elements_state(pipe,
+                                    velems);
+}
+
+static void
+identity_delete_vertex_elements_state(struct pipe_context *_pipe,
+                                      void *velems)
+{
+   struct identity_context *id_pipe = identity_context(_pipe);
+   struct pipe_context *pipe = id_pipe->pipe;
+
+   pipe->delete_vertex_elements_state(pipe,
+                                      velems);
+}
+
 static void
 identity_set_blend_color(struct pipe_context *_pipe,
                          const struct pipe_blend_color *blend_color)
@@ -563,20 +599,6 @@ identity_set_vertex_buffers(struct pipe_context *_pipe,
                             num_buffers,
                             buffers);
 }
-
-static void
-identity_set_vertex_elements(struct pipe_context *_pipe,
-                             unsigned num_elements,
-                             const struct pipe_vertex_element *vertex_elements)
-{
-   struct identity_context *id_pipe = identity_context(_pipe);
-   struct pipe_context *pipe = id_pipe->pipe;
-
-   pipe->set_vertex_elements(pipe,
-                             num_elements,
-                             vertex_elements);
-}
-
 static void
 identity_surface_copy(struct pipe_context *_pipe,
                       struct pipe_surface *_dst,
@@ -733,6 +755,9 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
    id_pipe->base.create_vs_state = identity_create_vs_state;
    id_pipe->base.bind_vs_state = identity_bind_vs_state;
    id_pipe->base.delete_vs_state = identity_delete_vs_state;
+   id_pipe->base.create_vertex_elements_state = identity_create_vertex_elements_state;
+   id_pipe->base.bind_vertex_elements_state = identity_bind_vertex_elements_state;
+   id_pipe->base.delete_vertex_elements_state = identity_delete_vertex_elements_state;
    id_pipe->base.set_blend_color = identity_set_blend_color;
    id_pipe->base.set_stencil_ref = identity_set_stencil_ref;
    id_pipe->base.set_clip_state = identity_set_clip_state;
@@ -744,7 +769,6 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
    id_pipe->base.set_fragment_sampler_textures = identity_set_fragment_sampler_textures;
    id_pipe->base.set_vertex_sampler_textures = identity_set_vertex_sampler_textures;
    id_pipe->base.set_vertex_buffers = identity_set_vertex_buffers;
-   id_pipe->base.set_vertex_elements = identity_set_vertex_elements;
    id_pipe->base.surface_copy = identity_surface_copy;
    id_pipe->base.surface_fill = identity_surface_fill;
    id_pipe->base.clear = identity_clear;
-- 
cgit v1.2.3


From 5bf4ff7dabfffd0018382d440682bd5f7d418606 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Mon, 1 Mar 2010 20:11:36 +0100
Subject: nv30: adapt to vertex element cso

---
 src/gallium/drivers/nv30/nv30_context.h |  8 ++++++--
 src/gallium/drivers/nv30/nv30_state.c   | 34 +++++++++++++++++++++++++++------
 src/gallium/drivers/nv30/nv30_vbo.c     |  8 ++++----
 3 files changed, 38 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h
index ca3d6aca7f..e48f531b94 100644
--- a/src/gallium/drivers/nv30/nv30_context.h
+++ b/src/gallium/drivers/nv30/nv30_context.h
@@ -108,6 +108,11 @@ struct nv30_state {
 	struct nouveau_stateobj *hw[NV30_STATE_MAX];
 };
 
+struct nv30_vtxelt_state {
+	struct pipe_vertex_element pipe[16];
+	unsigned num_elements;
+};
+
 struct nv30_context {
 	struct pipe_context pipe;
 
@@ -143,8 +148,7 @@ struct nv30_context {
 	unsigned dirty_samplers;
 	struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
 	unsigned vtxbuf_nr;
-	struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
-	unsigned vtxelt_nr;
+	struct nv30_vtxelt_state *vtxelt;
 };
 
 static INLINE struct nv30_context *
diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c
index d911c80707..24b15a63ac 100644
--- a/src/gallium/drivers/nv30/nv30_state.c
+++ b/src/gallium/drivers/nv30/nv30_state.c
@@ -669,15 +669,34 @@ nv30_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
 	/*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/
 }
 
+static void *
+nv30_vtxelts_state_create(struct pipe_context *pipe,
+			  unsigned num_elements,
+			  const struct pipe_vertex_element *elements)
+{
+	struct nv30_vtxelt_state *cso = CALLOC_STRUCT(nv30_vtxelt_state);
+
+	assert(num_elements < 16); /* not doing fallbacks yet */
+	cso->num_elements = num_elements;
+	memcpy(cso->pipe, elements, num_elements * sizeof(*elements));
+
+/*	nv30_vtxelt_construct(cso);*/
+
+	return (void *)cso;
+}
+
 static void
-nv30_set_vertex_elements(struct pipe_context *pipe, unsigned count,
-			 const struct pipe_vertex_element *ve)
+nv30_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
 {
-	struct nv30_context *nv30 = nv30_context(pipe);
+	FREE(hwcso);
+}
 
-	memcpy(nv30->vtxelt, ve, sizeof(*ve) * count);
-	nv30->vtxelt_nr = count;
+static void
+nv30_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+	struct nv30_context *nv30 = nv30_context(pipe);
 
+	nv30->vtxelt = hwcso;
 	nv30->dirty |= NV30_NEW_ARRAYS;
 	/*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/
 }
@@ -722,7 +741,10 @@ nv30_init_state_functions(struct nv30_context *nv30)
 	nv30->pipe.set_scissor_state = nv30_set_scissor_state;
 	nv30->pipe.set_viewport_state = nv30_set_viewport_state;
 
+	nv30->pipe.create_vertex_elements_state = nv30_vtxelts_state_create;
+	nv30->pipe.delete_vertex_elements_state = nv30_vtxelts_state_delete;
+	nv30->pipe.bind_vertex_elements_state = nv30_vtxelts_state_bind;
+
 	nv30->pipe.set_vertex_buffers = nv30_set_vertex_buffers;
-	nv30->pipe.set_vertex_elements = nv30_set_vertex_elements;
 }
 
diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c
index e48823a913..f3856bb5a5 100644
--- a/src/gallium/drivers/nv30/nv30_vbo.c
+++ b/src/gallium/drivers/nv30/nv30_vbo.c
@@ -492,16 +492,16 @@ nv30_vbo_validate(struct nv30_context *nv30)
 	int hw;
 
 	vtxbuf = so_new(3, 17, 18);
-	so_method(vtxbuf, rankine, NV34TCL_VTXBUF_ADDRESS(0), nv30->vtxelt_nr);
+	so_method(vtxbuf, rankine, NV34TCL_VTXBUF_ADDRESS(0), nv30->vtxelt->num_elements);
 	vtxfmt = so_new(1, 16, 0);
-	so_method(vtxfmt, rankine, NV34TCL_VTXFMT(0), nv30->vtxelt_nr);
+	so_method(vtxfmt, rankine, NV34TCL_VTXFMT(0), nv30->vtxelt->num_elements);
 
-	for (hw = 0; hw < nv30->vtxelt_nr; hw++) {
+	for (hw = 0; hw < nv30->vtxelt->num_elements; hw++) {
 		struct pipe_vertex_element *ve;
 		struct pipe_vertex_buffer *vb;
 		unsigned type, ncomp;
 
-		ve = &nv30->vtxelt[hw];
+		ve = &nv30->vtxelt->pipe[hw];
 		vb = &nv30->vtxbuf[ve->vertex_buffer_index];
 
 		if (!vb->stride) {
-- 
cgit v1.2.3


From ddccf7797425097ee6562290d3476075c38220b0 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Mon, 1 Mar 2010 20:11:47 +0100
Subject: nv40: adapt to vertex element cso

---
 src/gallium/drivers/nv40/nv40_context.h    |  9 ++++++--
 src/gallium/drivers/nv40/nv40_state.c      | 34 ++++++++++++++++++++++++------
 src/gallium/drivers/nv40/nv40_state_emit.c |  2 +-
 src/gallium/drivers/nv40/nv40_vbo.c        |  8 +++----
 4 files changed, 40 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h
index 4861924dac..f96f2b89bf 100644
--- a/src/gallium/drivers/nv40/nv40_context.h
+++ b/src/gallium/drivers/nv40/nv40_context.h
@@ -108,6 +108,12 @@ struct nv40_state {
 	struct nouveau_stateobj *hw[NV40_STATE_MAX];
 };
 
+
+struct nv40_vtxelt_state {
+	struct pipe_vertex_element pipe[16];
+	unsigned num_elements;
+};
+
 struct nv40_context {
 	struct pipe_context pipe;
 
@@ -158,8 +164,7 @@ struct nv40_context {
 	unsigned dirty_samplers;
 	struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
 	unsigned vtxbuf_nr;
-	struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
-	unsigned vtxelt_nr;
+	struct nv40_vtxelt_state *vtxelt;
 };
 
 static INLINE struct nv40_context *
diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c
index 2073bf0735..449e246b8c 100644
--- a/src/gallium/drivers/nv40/nv40_state.c
+++ b/src/gallium/drivers/nv40/nv40_state.c
@@ -684,15 +684,34 @@ nv40_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
 	nv40->draw_dirty |= NV40_NEW_ARRAYS;
 }
 
+static void *
+nv40_vtxelts_state_create(struct pipe_context *pipe,
+			  unsigned num_elements,
+			  const struct pipe_vertex_element *elements)
+{
+	struct nv40_vtxelt_state *cso = CALLOC_STRUCT(nv40_vtxelt_state);
+
+	assert(num_elements < 16); /* not doing fallbacks yet */
+	cso->num_elements = num_elements;
+	memcpy(cso->pipe, elements, num_elements * sizeof(*elements));
+
+/*	nv40_vtxelt_construct(cso);*/
+
+	return (void *)cso;
+}
+
 static void
-nv40_set_vertex_elements(struct pipe_context *pipe, unsigned count,
-			 const struct pipe_vertex_element *ve)
+nv40_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
 {
-	struct nv40_context *nv40 = nv40_context(pipe);
+	FREE(hwcso);
+}
 
-	memcpy(nv40->vtxelt, ve, sizeof(*ve) * count);
-	nv40->vtxelt_nr = count;
+static void
+nv40_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+	struct nv40_context *nv40 = nv40_context(pipe);
 
+	nv40->vtxelt = hwcso;
 	nv40->dirty |= NV40_NEW_ARRAYS;
 	nv40->draw_dirty |= NV40_NEW_ARRAYS;
 }
@@ -737,7 +756,10 @@ nv40_init_state_functions(struct nv40_context *nv40)
 	nv40->pipe.set_scissor_state = nv40_set_scissor_state;
 	nv40->pipe.set_viewport_state = nv40_set_viewport_state;
 
+	nv40->pipe.create_vertex_elements_state = nv40_vtxelts_state_create;
+	nv40->pipe.delete_vertex_elements_state = nv40_vtxelts_state_delete;
+	nv40->pipe.bind_vertex_elements_state = nv40_vtxelts_state_bind;
+
 	nv40->pipe.set_vertex_buffers = nv40_set_vertex_buffers;
-	nv40->pipe.set_vertex_elements = nv40_set_vertex_elements;
 }
 
diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c
index 8990f303ce..297d71f4fa 100644
--- a/src/gallium/drivers/nv40/nv40_state_emit.c
+++ b/src/gallium/drivers/nv40/nv40_state_emit.c
@@ -174,7 +174,7 @@ nv40_state_validate_swtnl(struct nv40_context *nv40)
 
 	if (nv40->draw_dirty & NV40_NEW_ARRAYS) {
 		draw_set_vertex_buffers(draw, nv40->vtxbuf_nr, nv40->vtxbuf);
-		draw_set_vertex_elements(draw, nv40->vtxelt_nr, nv40->vtxelt);	
+		draw_set_vertex_elements(draw, nv40->vtxelt->num_elements, nv40->vtxelt->pipe);	
 	}
 
 	nv40_state_do_validate(nv40, swtnl_states);
diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c
index 7812460d2e..fabdf4bf23 100644
--- a/src/gallium/drivers/nv40/nv40_vbo.c
+++ b/src/gallium/drivers/nv40/nv40_vbo.c
@@ -493,16 +493,16 @@ nv40_vbo_validate(struct nv40_context *nv40)
 	int hw;
 
 	vtxbuf = so_new(3, 17, 18);
-	so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt_nr);
+	so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt->num_elements);
 	vtxfmt = so_new(1, 16, 0);
-	so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), nv40->vtxelt_nr);
+	so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), nv40->vtxelt->num_elements);
 
-	for (hw = 0; hw < nv40->vtxelt_nr; hw++) {
+	for (hw = 0; hw < nv40->vtxelt->num_elements; hw++) {
 		struct pipe_vertex_element *ve;
 		struct pipe_vertex_buffer *vb;
 		unsigned type, ncomp;
 
-		ve = &nv40->vtxelt[hw];
+		ve = &nv40->vtxelt->pipe[hw];
 		vb = &nv40->vtxbuf[ve->vertex_buffer_index];
 
 		if (!vb->stride) {
-- 
cgit v1.2.3


From 4240987cecdaaaeb2d6188f7c83ff4cb8e670c59 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Tue, 2 Mar 2010 18:40:03 -0800
Subject: r300g: Make velem CSO actually work.

glxgears runs again.
---
 src/gallium/drivers/r300/r300_blit.c    |  2 +-
 src/gallium/drivers/r300/r300_context.h |  2 +-
 src/gallium/drivers/r300/r300_state.c   | 15 ++++++++++-----
 3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
index 2f9650df1b..297768e5cf 100644
--- a/src/gallium/drivers/r300/r300_blit.c
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -34,7 +34,7 @@ static void r300_blitter_save_states(struct r300_context* r300)
     util_blitter_save_rasterizer(r300->blitter, r300->rs_state.state);
     util_blitter_save_fragment_shader(r300->blitter, r300->fs);
     util_blitter_save_vertex_shader(r300->blitter, r300->vs);
-    util_blitter_save_vertex_elements(r300->blitter, r300->vs);
+    util_blitter_save_vertex_elements(r300->blitter, r300->velems);
 }
 
 /* Clear currently bound buffers. */
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 0ee0ab47a6..f372743c5b 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -249,7 +249,7 @@ struct r300_vertex_info {
 
 struct r300_velems_state {
     unsigned count;
-    struct pipe_vertex_element velem[];
+    struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
 };
 
 extern struct pipe_viewport_state r300_viewport_identity;
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index ceac690fc4..995664d900 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -1061,13 +1061,14 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe,
                                                unsigned count,
                                                const struct pipe_vertex_element* attribs)
 {
-   /*XXX could precalculate state here instead of later */
     struct r300_velems_state *velems;
+
+    /*XXX should precalculate state here instead of later */
     assert(count <= PIPE_MAX_ATTRIBS);
-    velems = (struct r300_velems_state *) MALLOC(sizeof(struct r300_velems_state) + count * sizeof(*attribs));
+    velems = CALLOC_STRUCT(r300_velems_state);
     if (velems) {
-       velems->count = count;
-       memcpy(velems->velem, attribs, sizeof(*attribs) * count);
+        velems->count = count;
+        memcpy(velems->velem, attribs, sizeof(struct pipe_vertex_element) * count);
     }
     return velems;
 }
@@ -1078,6 +1079,10 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe,
     struct r300_context *r300 = r300_context(pipe);
     struct r300_velems_state *r300_velems = (struct r300_velems_state *) velems;
 
+    if (velems == NULL) {
+        return;
+    }
+
     r300->velems = r300_velems;
 
     if (r300->draw) {
@@ -1094,7 +1099,7 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe,
 
 static void r300_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
 {
-   FREE( velems );
+   FREE(velems);
 }
 
 static void* r300_create_vs_state(struct pipe_context* pipe,
-- 
cgit v1.2.3


From e497a58fad878d913a1795b239ddd05870988c58 Mon Sep 17 00:00:00 2001
From: David Heidelberger <d.okias@gmail.com>
Date: Tue, 9 Mar 2010 13:50:27 +0100
Subject: nv30: fix typo

Signed-off-by: David Heidelberger <d.okias@gmail.com>
---
 src/gallium/drivers/nv30/nv30_miptree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c
index 5ef74a832d..bfa27b632f 100644
--- a/src/gallium/drivers/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nv30/nv30_miptree.c
@@ -236,5 +236,5 @@ nv30_screen_init_miptree_functions(struct pipe_screen *pscreen)
 	pscreen->get_tex_surface = nv30_miptree_surface_new;
 	pscreen->tex_surface_destroy = nv30_miptree_surface_del;
 
-	nouveau_screen(pscreen)->texture_blanket = nv50_miptree_blanket;
+	nouveau_screen(pscreen)->texture_blanket = nv30_miptree_blanket;
 }
-- 
cgit v1.2.3


From fe9f8536f1b1e7a3a2ac10afd8078e8f4d327578 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Tue, 9 Mar 2010 14:19:29 +0100
Subject: auxiliary: fix vertex elements cso

potentially could have got a match even though the cso was different
(in case of different count and first few elements the same).
---
 src/gallium/auxiliary/cso_cache/cso_cache.h   |  7 ++++++-
 src/gallium/auxiliary/cso_cache/cso_context.c | 19 +++++++++++++------
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h
index d884d5410f..fb09b83c62 100644
--- a/src/gallium/auxiliary/cso_cache/cso_cache.h
+++ b/src/gallium/auxiliary/cso_cache/cso_cache.h
@@ -146,8 +146,13 @@ struct cso_sampler {
    struct pipe_context *context;
 };
 
+struct cso_velems_state {
+   unsigned count;
+   struct pipe_vertex_element velems[PIPE_MAX_ATTRIBS];
+};
+
 struct cso_velements {
-   struct pipe_vertex_element state[PIPE_MAX_ATTRIBS];
+   struct cso_velems_state state;
    void *data;
    cso_state_callback delete_state;
    struct pipe_context *context;
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index 95e3c18e53..510366a8d4 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -1152,18 +1152,25 @@ enum pipe_error cso_set_vertex_elements(struct cso_context *ctx,
    unsigned key_size, hash_key;
    struct cso_hash_iter iter;
    void *handle;
-
-   key_size = sizeof(struct pipe_vertex_element) * count;
-   hash_key = cso_construct_key((void*)states, key_size);
-   iter = cso_find_state_template(ctx->cache, hash_key, CSO_VELEMENTS, (void*)states, key_size);
+   struct cso_velems_state velems_state;
+
+   /* need to include the count into the stored state data too.
+      Otherwise first few count pipe_vertex_elements could be identical even if count
+      is different, and there's no guarantee the hash would be different in that
+      case neither */
+   key_size = sizeof(struct pipe_vertex_element) * count + sizeof(unsigned);
+   velems_state.count = count;
+   memcpy(velems_state.velems, states, sizeof(struct pipe_vertex_element) * count);
+   hash_key = cso_construct_key((void*)&velems_state, key_size);
+   iter = cso_find_state_template(ctx->cache, hash_key, CSO_VELEMENTS, (void*)&velems_state, key_size);
 
    if (cso_hash_iter_is_null(iter)) {
       struct cso_velements *cso = MALLOC(sizeof(struct cso_velements));
       if (!cso)
          return PIPE_ERROR_OUT_OF_MEMORY;
 
-      memcpy(&cso->state, states, key_size);
-      cso->data = ctx->pipe->create_vertex_elements_state(ctx->pipe, count, &cso->state[0]);
+      memcpy(&cso->state, &velems_state, key_size);
+      cso->data = ctx->pipe->create_vertex_elements_state(ctx->pipe, count, &cso->state.velems[0]);
       cso->delete_state = (cso_state_callback)ctx->pipe->delete_vertex_elements_state;
       cso->context = ctx->pipe;
 
-- 
cgit v1.2.3


From e8983f70b41ea92a9527cb618db011b5dd136626 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Tue, 9 Mar 2010 14:23:00 +0100
Subject: gallium: don't use flexible array members in drivers for vertex
 elements cso

While this c99 feature should work with most compilers, valgrind doesn't
really like it, and this only really saves some memory, we don't do this
in similar occasions (like the blend state) neither.
---
 src/gallium/drivers/cell/ppu/cell_context.h      | 2 +-
 src/gallium/drivers/cell/ppu/cell_state_vertex.c | 2 +-
 src/gallium/drivers/i915/i915_context.h          | 2 +-
 src/gallium/drivers/i915/i915_state.c            | 2 +-
 src/gallium/drivers/i965/brw_pipe_vertex.c       | 2 +-
 src/gallium/drivers/llvmpipe/lp_state.h          | 2 +-
 src/gallium/drivers/llvmpipe/lp_state_vertex.c   | 2 +-
 src/gallium/drivers/softpipe/sp_state.h          | 2 +-
 src/gallium/drivers/softpipe/sp_state_vertex.c   | 2 +-
 src/gallium/drivers/svga/svga_context.h          | 2 +-
 src/gallium/drivers/svga/svga_pipe_vertex.c      | 2 +-
 11 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h
index 84ad0f3422..584f355804 100644
--- a/src/gallium/drivers/cell/ppu/cell_context.h
+++ b/src/gallium/drivers/cell/ppu/cell_context.h
@@ -96,7 +96,7 @@ struct cell_buffer_list
 struct cell_velems_state
 {
    unsigned count;
-   struct pipe_vertex_element velem[];
+   struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
 }
 
 /**
diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c
index 35c919fb6b..d3efb8ecea 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c
@@ -43,7 +43,7 @@ cell_create_vertex_elements_state(struct pipe_context *pipe,
 {
    struct cell_velems_state *velems;
    assert(count <= PIPE_MAX_ATTRIBS);
-   velems = (struct cell_velems_state *) MALLOC(sizeof(struct cell_velems_state) + count * sizeof(*attribs));
+   velems = (struct cell_velems_state *) MALLOC(sizeof(struct cell_velems_state));
    if (velems) {
       velems->count = count;
       memcpy(velems->velem, attribs, sizeof(*attribs) * count);
diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h
index 369c63eece..3e383aaa1c 100644
--- a/src/gallium/drivers/i915/i915_context.h
+++ b/src/gallium/drivers/i915/i915_context.h
@@ -189,7 +189,7 @@ struct i915_sampler_state {
 
 struct i915_velems_state {
    unsigned count;
-   struct pipe_vertex_element velem[];
+   struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
 };
 
 struct i915_texture {
diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c
index 46406065c3..8927dfc33d 100644
--- a/src/gallium/drivers/i915/i915_state.c
+++ b/src/gallium/drivers/i915/i915_state.c
@@ -749,7 +749,7 @@ i915_create_vertex_elements_state(struct pipe_context *pipe,
 {
    struct i915_velems_state *velems;
    assert(count <= PIPE_MAX_ATTRIBS);
-   velems = (struct i915_velems_state *) MALLOC(sizeof(struct i915_velems_state) + count * sizeof(*attribs));
+   velems = (struct i915_velems_state *) MALLOC(sizeof(struct i915_velems_state));
    if (velems) {
       velems->count = count;
       memcpy(velems->velem, attribs, sizeof(*attribs) * count);
diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c
index 3d84fb86fb..d6a840857e 100644
--- a/src/gallium/drivers/i965/brw_pipe_vertex.c
+++ b/src/gallium/drivers/i965/brw_pipe_vertex.c
@@ -215,7 +215,7 @@ static void* brw_create_vertex_elements_state( struct pipe_context *pipe,
                                                const struct pipe_vertex_element *attribs )
 {
    /* note: for the brw_swtnl.c code (if ever we need draw fallback) we'd also need
-      store the original data */
+      to store the original data */
    struct brw_context *brw = brw_context(pipe);
    struct brw_vertex_element_packet *velems;
    assert(count <= BRW_VEP_MAX);
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index 57f5bd0042..6dbdc195bf 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -121,7 +121,7 @@ struct lp_vertex_shader {
 
 struct lp_velems_state {
    unsigned count;
-   struct pipe_vertex_element velem[];
+   struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
 };
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/src/gallium/drivers/llvmpipe/lp_state_vertex.c
index 5a9b6d5e18..2ddd110a5f 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_vertex.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_vertex.c
@@ -42,7 +42,7 @@ llvmpipe_create_vertex_elements_state(struct pipe_context *pipe,
 {
    struct lp_velems_state *velems;
    assert(count <= PIPE_MAX_ATTRIBS);
-   velems = (struct lp_velems_state *) MALLOC(sizeof(struct lp_velems_state) + count * sizeof(*attribs));
+   velems = (struct lp_velems_state *) MALLOC(sizeof(struct lp_velems_state));
    if (velems) {
       velems->count = count;
       memcpy(velems->velem, attribs, sizeof(*attribs) * count);
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h
index a6b9a841fe..6b01c0f4d7 100644
--- a/src/gallium/drivers/softpipe/sp_state.h
+++ b/src/gallium/drivers/softpipe/sp_state.h
@@ -102,7 +102,7 @@ struct sp_geometry_shader {
 
 struct sp_velems_state {
    unsigned count;
-   struct pipe_vertex_element velem[];
+   struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
 };
 
 
diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c
index e7dc3d002b..a151758ddc 100644
--- a/src/gallium/drivers/softpipe/sp_state_vertex.c
+++ b/src/gallium/drivers/softpipe/sp_state_vertex.c
@@ -43,7 +43,7 @@ softpipe_create_vertex_elements_state(struct pipe_context *pipe,
 {
    struct sp_velems_state *velems;
    assert(count <= PIPE_MAX_ATTRIBS);
-   velems = (struct sp_velems_state *) MALLOC(sizeof(struct sp_velems_state) + count * sizeof(*attribs));
+   velems = (struct sp_velems_state *) MALLOC(sizeof(struct sp_velems_state));
    if (velems) {
       velems->count = count;
       memcpy(velems->velem, attribs, sizeof(*attribs) * count);
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
index 4d9f00991a..791d30edc0 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -171,7 +171,7 @@ struct svga_sampler_state {
 
 struct svga_velems_state {
    unsigned count;
-   struct pipe_vertex_element velem[];
+   struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
 };
 
 /* Use to calculate differences between state emitted to hardware and
diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c
index 979deb12af..d4a1280e74 100644
--- a/src/gallium/drivers/svga/svga_pipe_vertex.c
+++ b/src/gallium/drivers/svga/svga_pipe_vertex.c
@@ -73,7 +73,7 @@ svga_create_vertex_elements_state(struct pipe_context *pipe,
 {
    struct svga_velems_state *velems;
    assert(count <= PIPE_MAX_ATTRIBS);
-   velems = (struct svga_velems_state *) MALLOC(sizeof(struct svga_velems_state) + count * sizeof(*attribs));
+   velems = (struct svga_velems_state *) MALLOC(sizeof(struct svga_velems_state));
    if (velems) {
       velems->count = count;
       memcpy(velems->velem, attribs, sizeof(*attribs) * count);
-- 
cgit v1.2.3


From ae7b7bf1edcf6c492b4dcc162bca28a0090f601e Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Tue, 9 Mar 2010 15:09:01 +0100
Subject: st/mesa: fix clear/drawpixels/bitmap for new cso vertex elements
 interface

somehow those got lost...
---
 src/mesa/state_tracker/st_cb_bitmap.c     | 4 ++++
 src/mesa/state_tracker/st_cb_clear.c      | 4 ++++
 src/mesa/state_tracker/st_cb_drawpixels.c | 4 ++++
 src/mesa/state_tracker/st_context.c       | 8 ++++++++
 src/mesa/state_tracker/st_context.h       | 3 +++
 5 files changed, 23 insertions(+)

diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c
index 25430bba77..95b148a7b4 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -440,6 +440,7 @@ draw_bitmap_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
    cso_save_viewport(cso);
    cso_save_fragment_shader(cso);
    cso_save_vertex_shader(cso);
+   cso_save_vertex_elements(cso);
 
    /* rasterizer state: just scissor */
    st->bitmap.rasterizer.scissor = ctx->Scissor.Enabled;
@@ -490,6 +491,8 @@ draw_bitmap_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
       cso_set_viewport(cso, &vp);
    }
 
+   cso_set_vertex_elements(cso, 3, st->velems_util_draw);
+
    /* draw textured quad */
    offset = setup_bitmap_vertex_data(st, x, y, width, height, z, color);
 
@@ -506,6 +509,7 @@ draw_bitmap_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
    cso_restore_viewport(cso);
    cso_restore_fragment_shader(cso);
    cso_restore_vertex_shader(cso);
+   cso_restore_vertex_elements(cso);
 }
 
 
diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c
index 898c32293d..efba9853b4 100644
--- a/src/mesa/state_tracker/st_cb_clear.c
+++ b/src/mesa/state_tracker/st_cb_clear.c
@@ -220,6 +220,7 @@ clear_with_quad(GLcontext *ctx,
    cso_save_rasterizer(st->cso_context);
    cso_save_fragment_shader(st->cso_context);
    cso_save_vertex_shader(st->cso_context);
+   cso_save_vertex_elements(st->cso_context);
 
    /* blend state: RGBA masking */
    {
@@ -271,6 +272,8 @@ clear_with_quad(GLcontext *ctx,
       cso_set_depth_stencil_alpha(st->cso_context, &depth_stencil);
    }
 
+   cso_set_vertex_elements(st->cso_context, 2, st->velems_util_draw);
+
    cso_set_rasterizer(st->cso_context, &st->clear.raster);
 
    cso_set_fragment_shader_handle(st->cso_context, st->clear.fs);
@@ -286,6 +289,7 @@ clear_with_quad(GLcontext *ctx,
    cso_restore_rasterizer(st->cso_context);
    cso_restore_fragment_shader(st->cso_context);
    cso_restore_vertex_shader(st->cso_context);
+   cso_restore_vertex_elements(st->cso_context);
 
 }
 
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index 36c0a2b0e1..a21c0105e9 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -529,6 +529,7 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
    cso_save_sampler_textures(cso);
    cso_save_fragment_shader(cso);
    cso_save_vertex_shader(cso);
+   cso_save_vertex_elements(cso);
 
    /* rasterizer state: just scissor */
    {
@@ -581,6 +582,8 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
       cso_set_viewport(cso, &vp);
    }
 
+   cso_set_vertex_elements(cso, 3, st->velems_util_draw);
+
    /* texture state: */
    if (st->pixel_xfer.pixelmap_enabled) {
       struct pipe_texture *textures[2];
@@ -612,6 +615,7 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
    cso_restore_sampler_textures(cso);
    cso_restore_fragment_shader(cso);
    cso_restore_vertex_shader(cso);
+   cso_restore_vertex_elements(cso);
 }
 
 
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
index 8f6a0c2423..0329e0cef0 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -142,6 +142,14 @@ st_create_context_priv( GLcontext *ctx, struct pipe_context *pipe )
    for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
       st->state.sampler_list[i] = &st->state.samplers[i];
 
+   for (i = 0; i < 3; i++) {
+      memset(&st->velems_util_draw[i], 0, sizeof(struct pipe_vertex_element));
+      st->velems_util_draw[i].src_offset = i * 4 * sizeof(float);
+      st->velems_util_draw[i].instance_divisor = 0;
+      st->velems_util_draw[i].vertex_buffer_index = 0;
+      st->velems_util_draw[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+   }
+
    /* we want all vertex data to be placed in buffer objects */
    vbo_use_buffer_objects(ctx);
 
diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h
index 50e98d7146..f2fa7e8711 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -173,6 +173,9 @@ struct st_context
       unsigned vbuf_slot;
    } clear;
 
+   /** used for anything using util_draw_vertex_buffer */
+   struct pipe_vertex_element velems_util_draw[3];
+
    void *passthrough_fs;  /**< simple pass-through frag shader */
 
    struct gen_mipmap_state *gen_mipmap;
-- 
cgit v1.2.3


From 22812421b5f394af741022f2f65ba19bc7b0aa73 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Tue, 9 Mar 2010 16:24:47 +0100
Subject: r300g: fix gallium-vertexlementcso merge damage

---
 src/gallium/drivers/r300/r300_emit.c          |  4 ++--
 src/gallium/drivers/r300/r300_render.c        | 25 -------------------------
 src/gallium/drivers/r300/r300_state_derived.c |  1 -
 3 files changed, 2 insertions(+), 28 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 65377b3865..55e9217fd3 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -1004,7 +1004,7 @@ void r300_emit_buffer_validate(struct r300_context *r300,
         (struct r300_textures_state*)r300->textures_state.state;
     struct r300_texture* tex;
     struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
-    struct pipe_vertex_element *velem = r300->vertex_element;
+    struct pipe_vertex_element *velem = r300->velems->velem;
     struct pipe_buffer *pbuf;
     unsigned i;
     boolean invalid = FALSE;
@@ -1062,7 +1062,7 @@ validate:
     }
     /* ...vertex buffers for HWTCL path... */
     if (do_validate_vertex_buffers) {
-        for (i = 0; i < r300->vertex_element_count; i++) {
+        for (i = 0; i < r300->velems->count; i++) {
             pbuf = vbuf[velem[i].vertex_buffer_index].buffer;
 
             if (!r300->winsys->add_buffer(r300->winsys, pbuf,
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index d0e06ade40..2605628c02 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -319,31 +319,6 @@ static void r300_emit_draw_elements(struct r300_context *r300,
     END_CS;
 }
 
-static boolean r300_setup_vertex_buffers(struct r300_context *r300)
-{
-    struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
-    struct pipe_vertex_element *velem = r300->velems->velem;
-    struct pipe_buffer *pbuf;
-
-validate:
-    for (int i = 0; i < r300->velems->count; i++) {
-        pbuf = vbuf[velem[i].vertex_buffer_index].buffer;
-
-        if (!r300->winsys->add_buffer(r300->winsys, pbuf,
-                                      RADEON_GEM_DOMAIN_GTT, 0)) {
-            r300->context.flush(&r300->context, 0, NULL);
-            goto validate;
-        }
-    }
-
-    if (!r300->winsys->validate(r300->winsys)) {
-        r300->context.flush(&r300->context, 0, NULL);
-        return r300->winsys->validate(r300->winsys);
-    }
-
-    return TRUE;
-}
-
 static void r300_shorten_ubyte_elts(struct r300_context* r300,
                                     struct pipe_buffer** elts,
                                     unsigned count)
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 4422581b51..81a373abad 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -128,7 +128,6 @@ static void r300_vertex_psc(struct r300_context* r300)
             " in psc\n",
             vs->info.num_inputs,
             r300->velems->count);
-            r300->vertex_element_count);
 
     for (i = 0; i < r300->velems->count; i++) {
         format = r300->velems->velem[i].src_format;
-- 
cgit v1.2.3


From 4d02534628e5fd4cd1231c5d742928b34067f25c Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Tue, 9 Mar 2010 16:28:59 +0100
Subject: identity: fix copy & paste error

---
 src/gallium/drivers/identity/id_context.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c
index 05a9dfab77..baf0ae4401 100644
--- a/src/gallium/drivers/identity/id_context.c
+++ b/src/gallium/drivers/identity/id_context.c
@@ -378,7 +378,7 @@ identity_delete_vs_state(struct pipe_context *_pipe,
 }
 
 
-static void
+static void *
 identity_create_vertex_elements_state(struct pipe_context *_pipe,
                                       unsigned num_elements,
                                       const struct pipe_vertex_element *vertex_elements)
@@ -386,9 +386,9 @@ identity_create_vertex_elements_state(struct pipe_context *_pipe,
    struct identity_context *id_pipe = identity_context(_pipe);
    struct pipe_context *pipe = id_pipe->pipe;
 
-   pipe->create_vertex_elements_state(pipe,
-                                      num_elements,
-                                      vertex_elements);
+   return pipe->create_vertex_elements_state(pipe,
+                                             num_elements,
+                                             vertex_elements);
 }
 
 static void
-- 
cgit v1.2.3


From 9c8cb12388be6818ac11a28d40034d01047584b2 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Mon, 8 Mar 2010 21:35:43 -0800
Subject: r300g: Cleanup old PSC code a bit.

---
 src/gallium/drivers/r300/r300_state_derived.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 81a373abad..cb2b9bd99f 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -114,13 +114,9 @@ static void r300_vertex_psc(struct r300_context* r300)
     uint16_t type, swizzle;
     enum pipe_format format;
     unsigned i;
-    int identity[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
-    int* stream_tab;
 
     memset(vformat, 0, sizeof(struct r300_vertex_stream_state));
 
-    stream_tab = identity;
-
     /* Vertex shaders have no semantics on their inputs,
      * so PSC should just route stuff based on the vertex elements,
      * and not on attrib information. */
@@ -133,7 +129,7 @@ static void r300_vertex_psc(struct r300_context* r300)
         format = r300->velems->velem[i].src_format;
 
         type = r300_translate_vertex_data_type(format) |
-            (stream_tab[i] << R300_DST_VEC_LOC_SHIFT);
+            (i << R300_DST_VEC_LOC_SHIFT);
         swizzle = r300_translate_vertex_data_swizzle(format);
 
         if (i & 1) {
-- 
cgit v1.2.3


From 141c7cb0d6078b3a2832c4be03ddcf25e35e596b Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Mon, 8 Mar 2010 23:36:04 -0800
Subject: r300g: Precalculate and CSO-bind PSC state.

---
 src/gallium/drivers/r300/r300_context.c       |   2 -
 src/gallium/drivers/r300/r300_context.h       |   6 +-
 src/gallium/drivers/r300/r300_state.c         | 197 ++++++++++++++++++++++++--
 src/gallium/drivers/r300/r300_state_derived.c | 190 +------------------------
 4 files changed, 190 insertions(+), 205 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index e0a5532327..923e1e541f 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -60,7 +60,6 @@ static void r300_destroy_context(struct pipe_context* context)
     FREE(r300->rs_block_state.state);
     FREE(r300->scissor_state.state);
     FREE(r300->textures_state.state);
-    FREE(r300->vertex_stream_state.state);
     FREE(r300->vap_output_state.state);
     FREE(r300->viewport_state.state);
     FREE(r300->ztop_state.state);
@@ -147,7 +146,6 @@ static void r300_setup_atoms(struct r300_context* r300)
     r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block);
     r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state);
     r300->textures_state.state = CALLOC_STRUCT(r300_textures_state);
-    r300->vertex_stream_state.state = CALLOC_STRUCT(r300_vertex_stream_state);
     r300->vap_output_state.state = CALLOC_STRUCT(r300_vap_output_state);
     r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state);
     r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state);
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 80e69924f0..985e339112 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -288,9 +288,11 @@ struct r300_vertex_info {
     uint32_t vap_prog_stream_cntl_ext[8];
 };
 
-struct r300_velems_state {
+struct r300_vertex_element_state {
     unsigned count;
     struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
+
+    struct r300_vertex_stream_state vertex_stream;
 };
 
 extern struct pipe_viewport_state r300_viewport_identity;
@@ -365,7 +367,7 @@ struct r300_context {
     int vertex_buffer_count;
     int vertex_buffer_max_index;
     /* Vertex elements for Gallium. */
-    struct r300_velems_state *velems;
+    struct r300_vertex_element_state *velems;
 
     /* Vertex info for Draw. */
     struct vertex_info vertex_info;
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 7683a423f2..bd4c2766cb 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -1046,8 +1046,6 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe,
     if (r300->draw) {
         draw_flush(r300->draw);
         draw_set_vertex_buffers(r300->draw, count, buffers);
-    } else {
-        r300->vertex_stream_state.dirty = TRUE;
     }
 }
 
@@ -1067,37 +1065,209 @@ static boolean r300_validate_aos(struct r300_context *r300)
     return TRUE;
 }
 
+static void r300_draw_emit_attrib(struct r300_context* r300,
+                                  enum attrib_emit emit,
+                                  enum interp_mode interp,
+                                  int index)
+{
+    struct r300_vertex_shader* vs = r300->vs_state.state;
+    struct tgsi_shader_info* info = &vs->info;
+    int output;
+
+    output = draw_find_shader_output(r300->draw,
+                                     info->output_semantic_name[index],
+                                     info->output_semantic_index[index]);
+    draw_emit_vertex_attr(&r300->vertex_info, emit, interp, output);
+}
+
+static void r300_draw_emit_all_attribs(struct r300_context* r300)
+{
+    struct r300_vertex_shader* vs = r300->vs_state.state;
+    struct r300_shader_semantics* vs_outputs = &vs->outputs;
+    int i, gen_count;
+
+    /* Position. */
+    if (vs_outputs->pos != ATTR_UNUSED) {
+        r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
+                              vs_outputs->pos);
+    } else {
+        assert(0);
+    }
+
+    /* Point size. */
+    if (vs_outputs->psize != ATTR_UNUSED) {
+        r300_draw_emit_attrib(r300, EMIT_1F_PSIZE, INTERP_POS,
+                              vs_outputs->psize);
+    }
+
+    /* Colors. */
+    for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+        if (vs_outputs->color[i] != ATTR_UNUSED) {
+            r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR,
+                                  vs_outputs->color[i]);
+        }
+    }
+
+    /* XXX Back-face colors. */
+
+    /* Texture coordinates. */
+    gen_count = 0;
+    for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
+        if (vs_outputs->generic[i] != ATTR_UNUSED) {
+            r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
+                                  vs_outputs->generic[i]);
+            gen_count++;
+        }
+    }
+
+    /* Fog coordinates. */
+    if (vs_outputs->fog != ATTR_UNUSED) {
+        r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
+                              vs_outputs->fog);
+        gen_count++;
+    }
+
+    /* XXX magic */
+    assert(gen_count <= 8);
+}
+
+/* Update the PSC tables. */
+static void r300_vertex_psc(struct r300_vertex_element_state *velems)
+{
+    struct r300_vertex_stream_state *vstream = &velems->vertex_stream;
+    uint16_t type, swizzle;
+    enum pipe_format format;
+    unsigned i;
+
+    assert(velems->count <= 16);
+
+    /* Vertex shaders have no semantics on their inputs,
+     * so PSC should just route stuff based on the vertex elements,
+     * and not on attrib information. */
+    for (i = 0; i < velems->count; i++) {
+        format = velems->velem[i].src_format;
+
+        type = r300_translate_vertex_data_type(format) |
+            (i << R300_DST_VEC_LOC_SHIFT);
+        swizzle = r300_translate_vertex_data_swizzle(format);
+
+        if (i & 1) {
+            vstream->vap_prog_stream_cntl[i >> 1] |= type << 16;
+            vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
+        } else {
+            vstream->vap_prog_stream_cntl[i >> 1] |= type;
+            vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
+        }
+    }
+
+    /* Set the last vector in the PSC. */
+    if (i) {
+        i -= 1;
+    }
+    vstream->vap_prog_stream_cntl[i >> 1] |=
+        (R300_LAST_VEC << (i & 1 ? 16 : 0));
+
+    vstream->count = (i >> 1) + 1;
+}
+
+/* Update the PSC tables for SW TCL, using Draw. */
+static void r300_swtcl_vertex_psc(struct r300_context *r300,
+                                  struct r300_vertex_element_state *velems)
+{
+    struct r300_vertex_stream_state *vstream = &velems->vertex_stream;
+    struct r300_vertex_shader* vs = r300->vs_state.state;
+    struct vertex_info* vinfo = &r300->vertex_info;
+    uint16_t type, swizzle;
+    enum pipe_format format;
+    unsigned i, attrib_count;
+    int* vs_output_tab = vs->stream_loc_notcl;
+
+    /* For each Draw attribute, route it to the fragment shader according
+     * to the vs_output_tab. */
+    attrib_count = vinfo->num_attribs;
+    DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count);
+    for (i = 0; i < attrib_count; i++) {
+        DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d,"
+               " vs_output_tab %d\n", vinfo->attrib[i].src_index,
+               vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit,
+               vs_output_tab[i]);
+    }
+
+    for (i = 0; i < attrib_count; i++) {
+        /* Make sure we have a proper destination for our attribute. */
+        assert(vs_output_tab[i] != -1);
+
+        format = draw_translate_vinfo_format(vinfo->attrib[i].emit);
+
+        /* Obtain the type of data in this attribute. */
+        type = r300_translate_vertex_data_type(format) |
+            vs_output_tab[i] << R300_DST_VEC_LOC_SHIFT;
+
+        /* Obtain the swizzle for this attribute. Note that the default
+         * swizzle in the hardware is not XYZW! */
+        swizzle = r300_translate_vertex_data_swizzle(format);
+
+        /* Add the attribute to the PSC table. */
+        if (i & 1) {
+            vstream->vap_prog_stream_cntl[i >> 1] |= type << 16;
+            vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
+        } else {
+            vstream->vap_prog_stream_cntl[i >> 1] |= type;
+            vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
+        }
+    }
+
+    /* Set the last vector in the PSC. */
+    if (i) {
+        i -= 1;
+    }
+    vstream->vap_prog_stream_cntl[i >> 1] |=
+        (R300_LAST_VEC << (i & 1 ? 16 : 0));
+
+    vstream->count = (i >> 1) + 1;
+}
+
 static void* r300_create_vertex_elements_state(struct pipe_context* pipe,
                                                unsigned count,
                                                const struct pipe_vertex_element* attribs)
 {
-    struct r300_velems_state *velems;
+    struct r300_context *r300 = r300_context(pipe);
+    struct r300_screen* r300screen = r300_screen(pipe->screen);
+    struct r300_vertex_element_state *velems;
 
-    /*XXX should precalculate state here instead of later */
     assert(count <= PIPE_MAX_ATTRIBS);
-    velems = CALLOC_STRUCT(r300_velems_state);
-    if (velems) {
+    velems = CALLOC_STRUCT(r300_vertex_element_state);
+    if (velems != NULL) {
         velems->count = count;
         memcpy(velems->velem, attribs, sizeof(struct pipe_vertex_element) * count);
+
+        if (r300screen->caps->has_tcl) {
+            r300_vertex_psc(velems);
+        } else {
+            memset(&r300->vertex_info, 0, sizeof(struct vertex_info));
+            r300_draw_emit_all_attribs(r300);
+            draw_compute_vertex_size(&r300->vertex_info);
+            r300_swtcl_vertex_psc(r300, velems);
+        }
     }
     return velems;
 }
 
 static void r300_bind_vertex_elements_state(struct pipe_context *pipe,
-                                            void *velems)
+                                            void *state)
 {
     struct r300_context *r300 = r300_context(pipe);
-    struct r300_velems_state *r300_velems = (struct r300_velems_state *) velems;
+    struct r300_vertex_element_state *velems = state;
 
     if (velems == NULL) {
         return;
     }
 
-    r300->velems = r300_velems;
+    r300->velems = velems;
 
     if (r300->draw) {
         draw_flush(r300->draw);
-        draw_set_vertex_elements(r300->draw, r300_velems->count, r300_velems->velem);
+        draw_set_vertex_elements(r300->draw, velems->count, velems->velem);
     }
 
     if (!r300_validate_aos(r300)) {
@@ -1105,11 +1275,14 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe,
         assert(0);
         abort();
     }
+
+    UPDATE_STATE(&velems->vertex_stream, r300->vertex_stream_state);
+    r300->vertex_stream_state.size = (1 + velems->vertex_stream.count) * 2;
 }
 
-static void r300_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
+static void r300_delete_vertex_elements_state(struct pipe_context *pipe, void *state)
 {
-   FREE(velems);
+   FREE(state);
 }
 
 static void* r300_create_vs_state(struct pipe_context* pipe,
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index cb2b9bd99f..6b9f61acd7 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -37,183 +37,6 @@
 /* r300_state_derived: Various bits of state which are dependent upon
  * currently bound CSO data. */
 
-static void r300_draw_emit_attrib(struct r300_context* r300,
-                                  enum attrib_emit emit,
-                                  enum interp_mode interp,
-                                  int index)
-{
-    struct r300_vertex_shader* vs = r300->vs_state.state;
-    struct tgsi_shader_info* info = &vs->info;
-    int output;
-
-    output = draw_find_shader_output(r300->draw,
-                                     info->output_semantic_name[index],
-                                     info->output_semantic_index[index]);
-    draw_emit_vertex_attr(&r300->vertex_info, emit, interp, output);
-}
-
-static void r300_draw_emit_all_attribs(struct r300_context* r300)
-{
-    struct r300_vertex_shader* vs = r300->vs_state.state;
-    struct r300_shader_semantics* vs_outputs = &vs->outputs;
-    int i, gen_count;
-
-    /* Position. */
-    if (vs_outputs->pos != ATTR_UNUSED) {
-        r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
-                              vs_outputs->pos);
-    } else {
-        assert(0);
-    }
-
-    /* Point size. */
-    if (vs_outputs->psize != ATTR_UNUSED) {
-        r300_draw_emit_attrib(r300, EMIT_1F_PSIZE, INTERP_POS,
-                              vs_outputs->psize);
-    }
-
-    /* Colors. */
-    for (i = 0; i < ATTR_COLOR_COUNT; i++) {
-        if (vs_outputs->color[i] != ATTR_UNUSED) {
-            r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR,
-                                  vs_outputs->color[i]);
-        }
-    }
-
-    /* XXX Back-face colors. */
-
-    /* Texture coordinates. */
-    gen_count = 0;
-    for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
-        if (vs_outputs->generic[i] != ATTR_UNUSED) {
-            r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
-                                  vs_outputs->generic[i]);
-            gen_count++;
-        }
-    }
-
-    /* Fog coordinates. */
-    if (vs_outputs->fog != ATTR_UNUSED) {
-        r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
-                              vs_outputs->fog);
-        gen_count++;
-    }
-
-    /* XXX magic */
-    assert(gen_count <= 8);
-}
-
-/* Update the PSC tables. */
-/* XXX move this function into r300_state.c after TCL-bypass gets removed
- * XXX because this one is dependent only on vertex elements. */
-static void r300_vertex_psc(struct r300_context* r300)
-{
-    struct r300_vertex_shader* vs = r300->vs_state.state;
-    struct r300_vertex_stream_state *vformat =
-        (struct r300_vertex_stream_state*)r300->vertex_stream_state.state;
-    uint16_t type, swizzle;
-    enum pipe_format format;
-    unsigned i;
-
-    memset(vformat, 0, sizeof(struct r300_vertex_stream_state));
-
-    /* Vertex shaders have no semantics on their inputs,
-     * so PSC should just route stuff based on the vertex elements,
-     * and not on attrib information. */
-    DBG(r300, DBG_DRAW, "r300: vs expects %d attribs, routing %d elements"
-            " in psc\n",
-            vs->info.num_inputs,
-            r300->velems->count);
-
-    for (i = 0; i < r300->velems->count; i++) {
-        format = r300->velems->velem[i].src_format;
-
-        type = r300_translate_vertex_data_type(format) |
-            (i << R300_DST_VEC_LOC_SHIFT);
-        swizzle = r300_translate_vertex_data_swizzle(format);
-
-        if (i & 1) {
-            vformat->vap_prog_stream_cntl[i >> 1] |= type << 16;
-            vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
-        } else {
-            vformat->vap_prog_stream_cntl[i >> 1] |= type;
-            vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
-        }
-    }
-
-    assert(i <= 15);
-
-    /* Set the last vector in the PSC. */
-    if (i) {
-        i -= 1;
-    }
-    vformat->vap_prog_stream_cntl[i >> 1] |=
-        (R300_LAST_VEC << (i & 1 ? 16 : 0));
-
-    vformat->count = (i >> 1) + 1;
-    r300->vertex_stream_state.size = (1 + vformat->count) * 2;
-}
-
-/* Update the PSC tables for SW TCL, using Draw. */
-static void r300_swtcl_vertex_psc(struct r300_context* r300)
-{
-    struct r300_vertex_shader* vs = r300->vs_state.state;
-    struct r300_vertex_stream_state *vformat =
-        (struct r300_vertex_stream_state*)r300->vertex_stream_state.state;
-    struct vertex_info* vinfo = &r300->vertex_info;
-    uint16_t type, swizzle;
-    enum pipe_format format;
-    unsigned i, attrib_count;
-    int* vs_output_tab = vs->stream_loc_notcl;
-
-    memset(vformat, 0, sizeof(struct r300_vertex_stream_state));
-
-    /* For each Draw attribute, route it to the fragment shader according
-     * to the vs_output_tab. */
-    attrib_count = vinfo->num_attribs;
-    DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count);
-    for (i = 0; i < attrib_count; i++) {
-        DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d,"
-               " vs_output_tab %d\n", vinfo->attrib[i].src_index,
-               vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit,
-               vs_output_tab[i]);
-    }
-
-    for (i = 0; i < attrib_count; i++) {
-        /* Make sure we have a proper destination for our attribute. */
-        assert(vs_output_tab[i] != -1);
-
-        format = draw_translate_vinfo_format(vinfo->attrib[i].emit);
-
-        /* Obtain the type of data in this attribute. */
-        type = r300_translate_vertex_data_type(format) |
-            vs_output_tab[i] << R300_DST_VEC_LOC_SHIFT;
-
-        /* Obtain the swizzle for this attribute. Note that the default
-         * swizzle in the hardware is not XYZW! */
-        swizzle = r300_translate_vertex_data_swizzle(format);
-
-        /* Add the attribute to the PSC table. */
-        if (i & 1) {
-            vformat->vap_prog_stream_cntl[i >> 1] |= type << 16;
-            vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
-        } else {
-            vformat->vap_prog_stream_cntl[i >> 1] |= type;
-            vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
-        }
-    }
-
-    /* Set the last vector in the PSC. */
-    if (i) {
-        i -= 1;
-    }
-    vformat->vap_prog_stream_cntl[i >> 1] |=
-        (R300_LAST_VEC << (i & 1 ? 16 : 0));
-
-    vformat->count = (i >> 1) + 1;
-    r300->vertex_stream_state.size = (1 + vformat->count) * 2;
-}
-
 static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr,
                         boolean swizzle_0001)
 {
@@ -428,18 +251,8 @@ static void r300_update_rs_block(struct r300_context* r300,
 static void r300_update_derived_shader_state(struct r300_context* r300)
 {
     struct r300_vertex_shader* vs = r300->vs_state.state;
-    struct r300_screen* r300screen = r300_screen(r300->context.screen);
 
     r300_update_rs_block(r300, &vs->outputs, &r300->fs->inputs);
-
-    if (r300screen->caps->has_tcl) {
-        r300_vertex_psc(r300);
-    } else {
-        memset(&r300->vertex_info, 0, sizeof(struct vertex_info));
-        r300_draw_emit_all_attribs(r300);
-        draw_compute_vertex_size(&r300->vertex_info);
-        r300_swtcl_vertex_psc(r300);
-    }
 }
 
 static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa)
@@ -572,8 +385,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
 
 void r300_update_derived_state(struct r300_context* r300)
 {
-    if (r300->rs_block_state.dirty ||
-        r300->vertex_stream_state.dirty) { /* XXX put updating PSC out of this file */
+    if (r300->rs_block_state.dirty) {
         r300_update_derived_shader_state(r300);
     }
 
-- 
cgit v1.2.3


From a73fd447d4bb3d509fedf52b18a50fccab618298 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Tue, 9 Mar 2010 07:30:27 -0800
Subject: cso: Fix typo in assert.

---
 src/gallium/auxiliary/cso_cache/cso_context.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index 292e489312..f8cb01467c 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -1248,7 +1248,7 @@ enum pipe_error cso_set_vertex_elements(struct cso_context *ctx,
 
 void cso_save_vertex_elements(struct cso_context *ctx)
 {
-   assert(!ctx->velements);
+   assert(!ctx->velements_saved);
    ctx->velements_saved = ctx->velements;
 }
 
-- 
cgit v1.2.3


From 09a7c25e7661887be31f3cd340010b99b407ef16 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 7 Mar 2010 01:24:57 +0000
Subject: util: Use bitshift arithmetic for pack too.

---
 src/gallium/auxiliary/util/u_format_pack.py | 77 ++++++++++++++++++++++-------
 1 file changed, 58 insertions(+), 19 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_format_pack.py b/src/gallium/auxiliary/util/u_format_pack.py
index b49039db39..409d024c63 100644
--- a/src/gallium/auxiliary/util/u_format_pack.py
+++ b/src/gallium/auxiliary/util/u_format_pack.py
@@ -418,31 +418,70 @@ def generate_format_pack(format, src_channel, src_native_type, src_suffix):
 
     dst_native_type = native_type(format)
 
+    assert format.layout == PLAIN
+
+    inv_swizzle = format.inv_swizzles()
+    
     print 'static INLINE void'
     print 'util_format_%s_pack_%s(void *dst, %s r, %s g, %s b, %s a)' % (name, src_suffix, src_native_type, src_native_type, src_native_type, src_native_type)
     print '{'
-    print '   union util_format_%s pixel;' % format.short_name()
-
-    assert format.layout == PLAIN
+    
+    if format.is_bitmask():
+        depth = format.block_size()
+        print '   uint%u_t value = 0;' % depth 
 
-    inv_swizzle = format.inv_swizzles()
+        shift = 0
+        for i in range(4):
+            dst_channel = format.channels[i]
+            if inv_swizzle[i] is not None:
+                value = 'rgba'[inv_swizzle[i]]
+                value = conversion_expr(src_channel, dst_channel, dst_native_type, value)
+                if format.colorspace == ZS:
+                    if i == 3:
+                        value = get_one(dst_channel)
+                    elif i >= 1:
+                        value = '0'
+                if dst_channel.type in (UNSIGNED, SIGNED):
+                    if shift + dst_channel.size < depth:
+                        value = '(%s) & 0x%x' % (value, (1 << dst_channel.size) - 1)
+                    if shift:
+                        value = '(%s) << %u' % (value, shift)
+                    if dst_channel.type == SIGNED:
+                        # Cast to unsigned
+                        value = '(uint%u_t)(%s) ' % (depth, value)
+                else:
+                    value = None
+                if value is not None:
+                    print '   value |= %s;' % (value)
+                
+            shift += dst_channel.size
 
-    for i in range(4):
-        dst_channel = format.channels[i]
-        width = dst_channel.size
-        if inv_swizzle[i] is None:
-            continue
-        value = 'rgba'[inv_swizzle[i]]
-        value = conversion_expr(src_channel, dst_channel, dst_native_type, value)
-        if format.colorspace == ZS:
-            if i == 3:
-                value = get_one(dst_channel)
-            elif i >= 1:
-                value = '0'
-        print '   pixel.chan.%s = %s;' % (dst_channel.name, value)
+        print '#ifdef PIPE_ARCH_BIG_ENDIAN'
+        print '   value = util_bswap%u(value);' % depth
+        print '#endif'
+        
+        print '   *(uint%u_t *)dst = value;' % depth 
 
-    bswap_format(format)
-    print '   memcpy(dst, &pixel, sizeof pixel);'
+    else:
+        print '   union util_format_%s pixel;' % format.short_name()
+    
+        for i in range(4):
+            dst_channel = format.channels[i]
+            width = dst_channel.size
+            if inv_swizzle[i] is None:
+                continue
+            value = 'rgba'[inv_swizzle[i]]
+            value = conversion_expr(src_channel, dst_channel, dst_native_type, value)
+            if format.colorspace == ZS:
+                if i == 3:
+                    value = get_one(dst_channel)
+                elif i >= 1:
+                    value = '0'
+            print '   pixel.chan.%s = %s;' % (dst_channel.name, value)
+    
+        bswap_format(format)
+        print '   memcpy(dst, &pixel, sizeof pixel);'
+        
     print '}'
     print
     
-- 
cgit v1.2.3


From e0f10a6fca34619f61c3533b68d89dcb1ff2627c Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Tue, 9 Mar 2010 17:27:45 +0100
Subject: util: simplify util_format_get_nr_components helper

since the number of components is now stored in the pipe format description,
we obviously no longer need to figure this out ourselves.
---
 src/gallium/auxiliary/util/u_format.h | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index 20fdaca007..c08fdcafcc 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -423,14 +423,7 @@ static INLINE unsigned
 util_format_get_nr_components(enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
-   unsigned chan;
-   unsigned nr_comp = 0;
-   for (chan = 0; chan < 4; chan++) {
-      if (desc->channel[chan].size != 0) {
-         nr_comp++;
-      }
-   }
-   return nr_comp;
+   return desc->nr_channels;
 }
 
 /*
-- 
cgit v1.2.3


From 2ccae040a458ad0f95ee46916e2ea467d5cf9d02 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 9 Mar 2010 09:37:50 -0700
Subject: gallivm: checkpoint: nearest mipmap filtering

The LOD is computed from texcoord partial derivatives and used to
select a mipmap level.  Still some bugs in texel fetching.  Lots of
rough edges and unfinished parts but the basics are in place.

Lots of changes to the lp_bld_arit.c code to support non-vector/scalar
datatypes.
---
 src/gallium/auxiliary/gallivm/lp_bld_arit.c       | 143 +++++-
 src/gallium/auxiliary/gallivm/lp_bld_const.c      |   8 +-
 src/gallium/auxiliary/gallivm/lp_bld_logic.c      | 100 ++--
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 592 +++++++++++++---------
 src/gallium/auxiliary/gallivm/lp_bld_type.h       |  50 +-
 5 files changed, 601 insertions(+), 292 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 32f9e5201c..e2c6788397 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -644,13 +644,26 @@ lp_build_abs(struct lp_build_context *bld,
 
    if(type.floating) {
       /* Mask out the sign bit */
-      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
-      unsigned long long absMask = ~(1ULL << (type.width - 1));
-      LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask));
-      a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
-      a = LLVMBuildAnd(bld->builder, a, mask, "");
-      a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
-      return a;
+      if (type.length == 1) {
+         LLVMTypeRef int_type = LLVMIntType(type.width);
+         LLVMTypeRef float_type = LLVMFloatType();
+         unsigned long long absMask = ~(1ULL << (type.width - 1));
+         LLVMValueRef mask = LLVMConstInt(int_type, absMask, 0);
+         a = LLVMBuildBitCast(bld->builder, a, int_type, "");
+         a = LLVMBuildAnd(bld->builder, a, mask, "");
+         a = LLVMBuildBitCast(bld->builder, a, float_type, "");
+         return a;
+      }
+      else {
+         /* vector of floats */
+         LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+         unsigned long long absMask = ~(1ULL << (type.width - 1));
+         LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask));
+         a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+         a = LLVMBuildAnd(bld->builder, a, mask, "");
+         a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
+         return a;
+      }
    }
 
    if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) {
@@ -753,7 +766,7 @@ lp_build_set_sign(struct lp_build_context *bld,
 
 
 /**
- * Convert vector of int to vector of float.
+ * Convert vector of (or scalar) int to vector of (or scalar) float.
  */
 LLVMValueRef
 lp_build_int_to_float(struct lp_build_context *bld,
@@ -764,7 +777,11 @@ lp_build_int_to_float(struct lp_build_context *bld,
    assert(type.floating);
    /*assert(lp_check_value(type, a));*/
 
-   {
+   if (type.length == 1) {
+      LLVMTypeRef float_type = LLVMFloatType();
+      return LLVMBuildSIToFP(bld->builder, a, float_type, "");
+   }
+   else {
       LLVMTypeRef vec_type = lp_build_vec_type(type);
       /*LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);*/
       LLVMValueRef res;
@@ -921,12 +938,18 @@ lp_build_itrunc(struct lp_build_context *bld,
                 LLVMValueRef a)
 {
    const struct lp_type type = bld->type;
-   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
 
    assert(type.floating);
-   assert(lp_check_value(type, a));
 
-   return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
+   if (type.length == 1) {
+      LLVMTypeRef int_type = LLVMIntType(type.width);
+      return LLVMBuildFPTrunc(bld->builder, a, int_type, "");
+   }
+   else {
+      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+      assert(lp_check_value(type, a));
+      return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
+   }
 }
 
 
@@ -939,6 +962,15 @@ lp_build_iround(struct lp_build_context *bld,
    LLVMValueRef res;
 
    assert(type.floating);
+
+   if (type.length == 1) {
+      /* scalar float to int */
+      LLVMTypeRef int_type = LLVMIntType(type.width);
+      /* XXX we want rounding here! */
+      res = LLVMBuildFPToSI(bld->builder, a, int_type, "");
+      return res;
+   }
+
    assert(lp_check_value(type, a));
 
    if(util_cpu_caps.has_sse4_1) {
@@ -1207,6 +1239,7 @@ lp_build_polynomial(struct lp_build_context *bld,
                     unsigned num_coeffs)
 {
    const struct lp_type type = bld->type;
+   LLVMTypeRef float_type = LLVMFloatType();
    LLVMValueRef res = NULL;
    unsigned i;
 
@@ -1216,7 +1249,13 @@ lp_build_polynomial(struct lp_build_context *bld,
                    __FUNCTION__);
 
    for (i = num_coeffs; i--; ) {
-      LLVMValueRef coeff = lp_build_const_scalar(type, coeffs[i]);
+      LLVMValueRef coeff;
+
+      if (type.length == 1)
+         coeff = LLVMConstReal(float_type, coeffs[i]);
+      else
+         coeff = lp_build_const_scalar(type, coeffs[i]);
+
       if(res)
          res = lp_build_add(bld, coeff, lp_build_mul(bld, x, res));
       else
@@ -1410,11 +1449,87 @@ lp_build_log2_approx(struct lp_build_context *bld,
 }
 
 
+/** scalar version of above function */
+static void
+lp_build_float_log2_approx(struct lp_build_context *bld,
+                           LLVMValueRef x,
+                           LLVMValueRef *p_exp,
+                           LLVMValueRef *p_floor_log2,
+                           LLVMValueRef *p_log2)
+{
+   const struct lp_type type = bld->type;
+   LLVMTypeRef float_type = LLVMFloatType();
+   LLVMTypeRef int_type = LLVMIntType(type.width);
+
+   LLVMValueRef expmask = LLVMConstInt(int_type, 0x7f800000, 0);
+   LLVMValueRef mantmask = LLVMConstInt(int_type, 0x007fffff, 0);
+   LLVMValueRef one = LLVMConstBitCast(bld->one, int_type);
+
+   LLVMValueRef i = NULL;
+   LLVMValueRef exp = NULL;
+   LLVMValueRef mant = NULL;
+   LLVMValueRef logexp = NULL;
+   LLVMValueRef logmant = NULL;
+   LLVMValueRef res = NULL;
+
+   if(p_exp || p_floor_log2 || p_log2) {
+      /* TODO: optimize the constant case */
+      if(LLVMIsConstant(x))
+         debug_printf("%s: inefficient/imprecise constant arithmetic\n",
+                      __FUNCTION__);
+
+      assert(type.floating && type.width == 32);
+
+      i = LLVMBuildBitCast(bld->builder, x, int_type, "");
+
+      /* exp = (float) exponent(x) */
+      exp = LLVMBuildAnd(bld->builder, i, expmask, "");
+   }
+
+   if(p_floor_log2 || p_log2) {
+      LLVMValueRef c23 = LLVMConstInt(int_type, 23, 0);
+      LLVMValueRef c127 = LLVMConstInt(int_type, 127, 0);
+      logexp = LLVMBuildLShr(bld->builder, exp, c23, "");
+      logexp = LLVMBuildSub(bld->builder, logexp, c127, "");
+      logexp = LLVMBuildSIToFP(bld->builder, logexp, float_type, "");
+   }
+
+   if(p_log2) {
+      /* mant = (float) mantissa(x) */
+      mant = LLVMBuildAnd(bld->builder, i, mantmask, "");
+      mant = LLVMBuildOr(bld->builder, mant, one, "");
+      mant = LLVMBuildBitCast(bld->builder, mant, float_type, "");
+
+      logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial,
+                                    Elements(lp_build_log2_polynomial));
+
+      /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
+      logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), "");
+
+      res = LLVMBuildAdd(bld->builder, logmant, logexp, "");
+   }
+
+   if(p_exp)
+      *p_exp = exp;
+
+   if(p_floor_log2)
+      *p_floor_log2 = logexp;
+
+   if(p_log2)
+      *p_log2 = res;
+}
+
+
 LLVMValueRef
 lp_build_log2(struct lp_build_context *bld,
               LLVMValueRef x)
 {
    LLVMValueRef res;
-   lp_build_log2_approx(bld, x, NULL, NULL, &res);
+   if (bld->type.length == 1) {
+      lp_build_float_log2_approx(bld, x, NULL, NULL, &res);
+   }
+   else {
+      lp_build_log2_approx(bld, x, NULL, NULL, &res);
+   }
    return res;
 }
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.c b/src/gallium/auxiliary/gallivm/lp_bld_const.c
index c8eaa8c394..53447757e8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_const.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c
@@ -264,10 +264,16 @@ lp_build_one(struct lp_type type)
    for(i = 1; i < type.length; ++i)
       elems[i] = elems[0];
 
-   return LLVMConstVector(elems, type.length);
+   if (type.length == 1)
+      return elems[0];
+   else
+      return LLVMConstVector(elems, type.length);
 }
                
 
+/**
+ * Build constant-valued vector from a scalar value.
+ */
 LLVMValueRef
 lp_build_const_scalar(struct lp_type type,
                       double val)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
index 2726747eae..7c585fda78 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -198,7 +198,7 @@ lp_build_compare(LLVMBuilderRef builder,
 
          return res;
       }
-   }
+   } /* if (type.width * type.length == 128) */
 #endif
 
    if(type.floating) {
@@ -238,20 +238,25 @@ lp_build_compare(LLVMBuilderRef builder,
       cond = LLVMBuildFCmp(builder, op, a, b, "");
       res = LLVMBuildSelect(builder, cond, ones, zeros, "");
 #else
-      debug_printf("%s: warning: using slow element-wise vector comparison\n",
-                   __FUNCTION__);
       res = LLVMGetUndef(int_vec_type);
-      for(i = 0; i < type.length; ++i) {
-         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
-         cond = LLVMBuildFCmp(builder, op,
-                              LLVMBuildExtractElement(builder, a, index, ""),
-                              LLVMBuildExtractElement(builder, b, index, ""),
-                              "");
-         cond = LLVMBuildSelect(builder, cond,
-                                LLVMConstExtractElement(ones, index),
-                                LLVMConstExtractElement(zeros, index),
-                                "");
-         res = LLVMBuildInsertElement(builder, res, cond, index, "");
+      if (type.length == 1) {
+         res = LLVMBuildFCmp(builder, op, a, b, "");
+      }
+      else {
+         debug_printf("%s: warning: using slow element-wise float"
+                      " vector comparison\n", __FUNCTION__);
+         for (i = 0; i < type.length; ++i) {
+            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+            cond = LLVMBuildFCmp(builder, op,
+                                 LLVMBuildExtractElement(builder, a, index, ""),
+                                 LLVMBuildExtractElement(builder, b, index, ""),
+                                 "");
+            cond = LLVMBuildSelect(builder, cond,
+                                   LLVMConstExtractElement(ones, index),
+                                   LLVMConstExtractElement(zeros, index),
+                                   "");
+            res = LLVMBuildInsertElement(builder, res, cond, index, "");
+         }
       }
 #endif
    }
@@ -286,20 +291,26 @@ lp_build_compare(LLVMBuilderRef builder,
       cond = LLVMBuildICmp(builder, op, a, b, "");
       res = LLVMBuildSelect(builder, cond, ones, zeros, "");
 #else
-      debug_printf("%s: warning: using slow element-wise int vector comparison\n",
-                   __FUNCTION__);
       res = LLVMGetUndef(int_vec_type);
-      for(i = 0; i < type.length; ++i) {
-         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
-         cond = LLVMBuildICmp(builder, op,
-                              LLVMBuildExtractElement(builder, a, index, ""),
-                              LLVMBuildExtractElement(builder, b, index, ""),
-                              "");
-         cond = LLVMBuildSelect(builder, cond,
-                                LLVMConstExtractElement(ones, index),
-                                LLVMConstExtractElement(zeros, index),
-                                "");
-         res = LLVMBuildInsertElement(builder, res, cond, index, "");
+      if (type.length == 1) {
+         res = LLVMBuildICmp(builder, op, a, b, "");
+      }
+      else {
+         debug_printf("%s: warning: using slow element-wise int"
+                      " vector comparison\n", __FUNCTION__);
+
+         for(i = 0; i < type.length; ++i) {
+            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+            cond = LLVMBuildICmp(builder, op,
+                                 LLVMBuildExtractElement(builder, a, index, ""),
+                                 LLVMBuildExtractElement(builder, b, index, ""),
+                                 "");
+            cond = LLVMBuildSelect(builder, cond,
+                                   LLVMConstExtractElement(ones, index),
+                                   LLVMConstExtractElement(zeros, index),
+                                   "");
+            res = LLVMBuildInsertElement(builder, res, cond, index, "");
+         }
       }
 #endif
    }
@@ -339,26 +350,31 @@ lp_build_select(struct lp_build_context *bld,
    if(a == b)
       return a;
 
-   if(type.floating) {
-      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
-      a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
-      b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
+   if (type.length == 1) {
+      res = LLVMBuildSelect(bld->builder, mask, a, b, "");
    }
+   else {
+      if(type.floating) {
+         LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+         a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+         b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
+      }
 
-   a = LLVMBuildAnd(bld->builder, a, mask, "");
+      a = LLVMBuildAnd(bld->builder, a, mask, "");
 
-   /* This often gets translated to PANDN, but sometimes the NOT is
-    * pre-computed and stored in another constant. The best strategy depends
-    * on available registers, so it is not a big deal -- hopefully LLVM does
-    * the right decision attending the rest of the program.
-    */
-   b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
+      /* This often gets translated to PANDN, but sometimes the NOT is
+       * pre-computed and stored in another constant. The best strategy depends
+       * on available registers, so it is not a big deal -- hopefully LLVM does
+       * the right decision attending the rest of the program.
+       */
+      b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
 
-   res = LLVMBuildOr(bld->builder, a, b, "");
+      res = LLVMBuildOr(bld->builder, a, b, "");
 
-   if(type.floating) {
-      LLVMTypeRef vec_type = lp_build_vec_type(type);
-      res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
+      if(type.floating) {
+         LLVMTypeRef vec_type = lp_build_vec_type(type);
+         res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
+      }
    }
 
    return res;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 1dca29cdd5..a965d394f4 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -65,6 +65,14 @@ struct lp_build_sample_context
 
    const struct util_format_description *format_desc;
 
+   /** regular scalar float type */
+   struct lp_type float_type;
+   struct lp_build_context float_bld;
+
+   /** regular scalar float type */
+   struct lp_type int_type;
+   struct lp_build_context int_bld;
+
    /** Incoming coordinates type and build context */
    struct lp_type coord_type;
    struct lp_build_context coord_bld;
@@ -108,6 +116,27 @@ wrap_mode_uses_border_color(unsigned mode)
 }
 
 
+static LLVMValueRef
+lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
+                          LLVMValueRef data_array, LLVMValueRef level)
+{
+   LLVMValueRef indexes[2], data_ptr;
+   indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+   indexes[1] = level;
+   data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
+   data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
+   return data_ptr;
+}
+
+
+static LLVMValueRef
+lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
+                                LLVMValueRef data_array, int level)
+{
+   LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
+   return lp_build_get_mipmap_level(bld, data_array, lvl);
+}
+
 
 /**
  * Gen code to fetch a texel from a texture at int coords (x, y).
@@ -124,14 +153,13 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
                           LLVMValueRef x,
                           LLVMValueRef y,
                           LLVMValueRef y_stride,
-                          LLVMValueRef data_array,
+                          LLVMValueRef data_ptr,
                           LLVMValueRef *texel)
 {
    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    LLVMValueRef offset;
    LLVMValueRef packed;
    LLVMValueRef use_border = NULL;
-   LLVMValueRef data_ptr;
 
    /* use_border = x < 0 || x >= width || y < 0 || y >= height */
    if (wrap_mode_uses_border_color(bld->static_state->wrap_s)) {
@@ -154,16 +182,6 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
       }
    }
 
-   /* XXX always use mipmap level 0 for now */
-   {
-      const int level = 0;
-      LLVMValueRef indexes[2];
-      indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
-      indexes[1] = LLVMConstInt(LLVMInt32Type(), level, 0);
-      data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
-      data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
-   }
-
    /*
     * Note: if we find an app which frequently samples the texture border
     * we might want to implement a true conditional here to avoid sampling
@@ -233,17 +251,8 @@ lp_build_sample_packed(struct lp_build_sample_context *bld,
    assert(bld->format_desc->block.height == 1);
    assert(bld->format_desc->block.bits <= bld->texel_type.width);
 
-   /* XXX always use mipmap level 0 for now */
-   {
-      const int level = 0;
-      LLVMValueRef indexes[2];
-      /* get data_ptr[level] */
-      indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
-      indexes[1] = LLVMConstInt(LLVMInt32Type(), level, 0);
-      data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
-      /* load texture base address */
-      data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
-   }
+   /* get pointer to mipmap level 0 data */
+   data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
 
    return lp_build_gather(bld->builder,
                           bld->texel_type.length,
@@ -733,7 +742,210 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
 
 
 /**
- * Sample 2D texture with nearest filtering.
+ * Codegen equivalent for u_minify().
+ * Return max(1, base_size >> level);
+ */
+static LLVMValueRef
+lp_build_minify(struct lp_build_sample_context *bld,
+                LLVMValueRef base_size,
+                LLVMValueRef level)
+{
+   LLVMValueRef size = LLVMBuildAShr(bld->builder, base_size, level, "minify");
+   size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
+   return size;
+}
+
+
+static int
+texture_dims(enum pipe_texture_target tex)
+{
+   switch (tex) {
+   case PIPE_TEXTURE_1D:
+      return 1;
+   case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_CUBE:
+      return 2;
+   case PIPE_TEXTURE_3D:
+      return 3;
+   default:
+      assert(0 && "bad texture target in texture_dims()");
+      return 2;
+   }
+}
+
+
+/**
+ * Generate code to compute texture level of detail (lambda).
+ * \param s  vector of texcoord s values
+ * \param t  vector of texcoord t values
+ * \param r  vector of texcoord r values
+ * \param width  scalar int texture width
+ * \param height  scalar int texture height
+ * \param depth  scalar int texture depth
+ */
+static LLVMValueRef
+lp_build_lod_selector(struct lp_build_sample_context *bld,
+                      LLVMValueRef s,
+                      LLVMValueRef t,
+                      LLVMValueRef r,
+                      LLVMValueRef width,
+                      LLVMValueRef height,
+                      LLVMValueRef depth)
+
+{
+   const int dims = texture_dims(bld->static_state->target);
+   struct lp_build_context *coord_bld = &bld->coord_bld;
+   struct lp_build_context *float_bld = &bld->float_bld;
+   LLVMValueRef lod_bias = LLVMConstReal(LLVMFloatType(), bld->static_state->lod_bias);
+   LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
+   LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->max_lod);
+
+   LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+   LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
+   LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
+
+   LLVMValueRef s0, s1, s2;
+   LLVMValueRef t0, t1, t2;
+   LLVMValueRef r0, r1, r2;
+   LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
+   LLVMValueRef rho, lod;
+
+   /*
+    * dsdx = abs(s[1] - s[0]);
+    * dsdy = abs(s[2] - s[0]);
+    * dtdx = abs(t[1] - t[0]);
+    * dtdy = abs(t[2] - t[0]);
+    * drdx = abs(r[1] - r[0]);
+    * drdy = abs(r[2] - r[0]);
+    * XXX we're assuming a four-element quad in 2x2 layout here.
+    */
+   s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0");
+   s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1");
+   s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2");
+   dsdx = LLVMBuildSub(bld->builder, s1, s0, "");
+   dsdx = lp_build_abs(float_bld, dsdx);
+   dsdy = LLVMBuildSub(bld->builder, s2, s0, "");
+   dsdy = lp_build_abs(float_bld, dsdy);
+   if (dims > 1) {
+      t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0");
+      t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1");
+      t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2");
+      dtdx = LLVMBuildSub(bld->builder, t1, t0, "");
+      dtdx = lp_build_abs(float_bld, dtdx);
+      dtdy = LLVMBuildSub(bld->builder, t2, t0, "");
+      dtdy = lp_build_abs(float_bld, dtdy);
+      if (dims > 2) {
+         r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0");
+         r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1");
+         r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2");
+         drdx = LLVMBuildSub(bld->builder, r1, r0, "");
+         drdx = lp_build_abs(float_bld, drdx);
+         drdy = LLVMBuildSub(bld->builder, r2, r0, "");
+         drdy = lp_build_abs(float_bld, drdy);
+      }
+   }
+
+   /* Compute rho = max of all partial derivatives scaled by texture size.
+    * XXX this could be vectorized somewhat
+    */
+   rho = LLVMBuildMul(bld->builder,
+                      lp_build_max(float_bld, dsdx, dsdy),
+                      lp_build_int_to_float(float_bld, width), "");
+   if (dims > 1) {
+      LLVMValueRef max;
+      max = LLVMBuildMul(bld->builder,
+                         lp_build_max(float_bld, dtdx, dtdy),
+                         lp_build_int_to_float(float_bld, height), "");
+      rho = lp_build_max(float_bld, rho, max);
+      if (dims > 2) {
+         max = LLVMBuildMul(bld->builder,
+                            lp_build_max(float_bld, drdx, drdy),
+                            lp_build_int_to_float(float_bld, depth), "");
+         rho = lp_build_max(float_bld, rho, max);
+      }
+   }
+
+   /* compute lod = log2(rho) */
+   lod = lp_build_log2(float_bld, rho);
+
+   /* add lod bias */
+   lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "LOD bias");
+
+   /* clamp lod */
+   lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
+
+   return lod;
+}
+
+
+/**
+ * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
+ * mipmap level index.
+ * Note: this is all scalar code.
+ * \param lod  scalar float texture level of detail
+ * \param level_out  returns integer 
+ */
+static void
+lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
+                           unsigned unit,
+                           LLVMValueRef lod,
+                           LLVMValueRef *level_out)
+{
+   struct lp_build_context *float_bld = &bld->float_bld;
+   struct lp_build_context *int_bld = &bld->int_bld;
+   LLVMValueRef last_level, level;
+
+   LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0);
+
+   last_level = bld->dynamic_state->last_level(bld->dynamic_state,
+                                               bld->builder, unit);
+
+   /* convert float lod to integer */
+   level = lp_build_iround(float_bld, lod);
+
+   /* clamp level to legal range of levels */
+   *level_out = lp_build_clamp(int_bld, level, zero, last_level);
+}
+
+
+/**
+ * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
+ * two (adjacent) mipmap level indexes.  Later, we'll sample from those
+ * two mipmap levels and interpolate between them.
+ */
+static void
+lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
+                           unsigned unit,
+                           LLVMValueRef lod,
+                           LLVMValueRef *level0_out,
+                           LLVMValueRef *level1_out,
+                           LLVMValueRef *weight_out)
+{
+   struct lp_build_context *coord_bld = &bld->coord_bld;
+   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
+   LLVMValueRef last_level, level;
+
+   last_level = bld->dynamic_state->last_level(bld->dynamic_state,
+                                               bld->builder, unit);
+
+   /* convert float lod to integer */
+   level = lp_build_ifloor(coord_bld, lod);
+
+   /* compute level 0 and clamp to legal range of levels */
+   *level0_out = lp_build_clamp(int_coord_bld, level,
+                                int_coord_bld->zero,
+                                last_level);
+   /* compute level 1 and clamp to legal range of levels */
+   *level1_out = lp_build_add(int_coord_bld, *level0_out, int_coord_bld->one);
+   *level1_out = lp_build_min(int_coord_bld, *level1_out, int_coord_bld->zero);
+
+   *weight_out = lp_build_fract(coord_bld, lod);
+}
+
+
+
+/**
+ * Sample 2D texture with nearest filtering, no mipmapping.
  */
 static void
 lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
@@ -746,6 +958,7 @@ lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
                                LLVMValueRef *texel)
 {
    LLVMValueRef x, y;
+   LLVMValueRef data_ptr;
 
    x = lp_build_sample_wrap_nearest(bld, s, width,
                                     bld->static_state->pot_width,
@@ -757,7 +970,63 @@ lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
    lp_build_name(x, "tex.x.wrapped");
    lp_build_name(y, "tex.y.wrapped");
 
-   lp_build_sample_texel_soa(bld, width, height, x, y, stride, data_array, texel);
+   /* get pointer to mipmap level 0 data */
+   data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
+
+   lp_build_sample_texel_soa(bld, width, height, x, y, stride, data_ptr, texel);
+}
+
+
+/**
+ * Sample 2D texture with nearest filtering, nearest mipmap.
+ */
+static void
+lp_build_sample_2d_nearest_mip_nearest_soa(struct lp_build_sample_context *bld,
+                                           unsigned unit,
+                                           LLVMValueRef s,
+                                           LLVMValueRef t,
+                                           LLVMValueRef width,
+                                           LLVMValueRef height,
+                                           LLVMValueRef width_vec,
+                                           LLVMValueRef height_vec,
+                                           LLVMValueRef stride,
+                                           LLVMValueRef data_array,
+                                           LLVMValueRef *texel)
+{
+   LLVMValueRef x, y;
+   LLVMValueRef lod, ilevel, ilevel_vec;
+   LLVMValueRef data_ptr;
+
+   /* compute float LOD */
+   lod = lp_build_lod_selector(bld, s, t, NULL, width, height, NULL);
+
+   /* convert LOD to int */
+   lp_build_nearest_mip_level(bld, unit, lod, &ilevel);
+
+   ilevel_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel);
+
+   /* compute width_vec, height at mipmap level 'ilevel' */
+   width_vec = lp_build_minify(bld, width_vec, ilevel_vec);
+   height_vec = lp_build_minify(bld, height_vec, ilevel_vec);
+   stride = lp_build_minify(bld, stride, ilevel_vec);
+
+   x = lp_build_sample_wrap_nearest(bld, s, width_vec,
+                                    bld->static_state->pot_width,
+                                    bld->static_state->wrap_s);
+   y = lp_build_sample_wrap_nearest(bld, t, height_vec,
+                                    bld->static_state->pot_height,
+                                    bld->static_state->wrap_t);
+
+   lp_build_name(x, "tex.x.wrapped");
+   lp_build_name(y, "tex.y.wrapped");
+
+   /* get pointer to mipmap level [ilevel] data */
+   if (0)
+      data_ptr = lp_build_get_mipmap_level(bld, data_array, ilevel);
+   else
+      data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
+
+   lp_build_sample_texel_soa(bld, width_vec, height_vec, x, y, stride, data_ptr, texel);
 }
 
 
@@ -779,6 +1048,7 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
    LLVMValueRef x0, x1;
    LLVMValueRef y0, y1;
    LLVMValueRef neighbors[2][2][4];
+   LLVMValueRef data_ptr;
    unsigned chan;
 
    lp_build_sample_wrap_linear(bld, s, width, bld->static_state->pot_width,
@@ -786,10 +1056,13 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
    lp_build_sample_wrap_linear(bld, t, height, bld->static_state->pot_height,
                                bld->static_state->wrap_t, &y0, &y1, &t_fpart);
 
-   lp_build_sample_texel_soa(bld, width, height, x0, y0, stride, data_array, neighbors[0][0]);
-   lp_build_sample_texel_soa(bld, width, height, x1, y0, stride, data_array, neighbors[0][1]);
-   lp_build_sample_texel_soa(bld, width, height, x0, y1, stride, data_array, neighbors[1][0]);
-   lp_build_sample_texel_soa(bld, width, height, x1, y1, stride, data_array, neighbors[1][1]);
+   /* get pointer to mipmap level 0 data */
+   data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
+
+   lp_build_sample_texel_soa(bld, width, height, x0, y0, stride, data_ptr, neighbors[0][0]);
+   lp_build_sample_texel_soa(bld, width, height, x1, y0, stride, data_ptr, neighbors[0][1]);
+   lp_build_sample_texel_soa(bld, width, height, x0, y1, stride, data_ptr, neighbors[1][0]);
+   lp_build_sample_texel_soa(bld, width, height, x1, y1, stride, data_ptr, neighbors[1][1]);
 
    /* TODO: Don't interpolate missing channels */
    for(chan = 0; chan < 4; ++chan) {
@@ -857,7 +1130,7 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
    LLVMValueRef packed, packed_lo, packed_hi;
    LLVMValueRef unswizzled[4];
 
-   lp_build_context_init(&i32, builder, lp_type_int(32));
+   lp_build_context_init(&i32, builder, lp_type_int_vec(32));
    lp_build_context_init(&h16, builder, lp_type_ufixed(16));
    lp_build_context_init(&u8n, builder, lp_type_unorm(8));
 
@@ -1066,194 +1339,11 @@ lp_build_sample_compare(struct lp_build_sample_context *bld,
 }
 
 
-static int
-texture_dims(enum pipe_texture_target tex)
-{
-   switch (tex) {
-   case PIPE_TEXTURE_1D:
-      return 1;
-   case PIPE_TEXTURE_2D:
-   case PIPE_TEXTURE_CUBE:
-      return 2;
-   case PIPE_TEXTURE_3D:
-      return 3;
-   default:
-      assert(0 && "bad texture target in texture_dims()");
-      return 2;
-   }
-}
-
-
-/**
- * Generate code to compute texture level of detail (lambda).
- * \param s  vector of texcoord s values
- * \param t  vector of texcoord t values
- * \param r  vector of texcoord r values
- * \param width  scalar int texture width
- * \param height  scalar int texture height
- * \param depth  scalar int texture depth
- */
-static LLVMValueRef
-lp_build_lod_selector(struct lp_build_sample_context *bld,
-                      LLVMValueRef s,
-                      LLVMValueRef t,
-                      LLVMValueRef r,
-                      LLVMValueRef width,
-                      LLVMValueRef height,
-                      LLVMValueRef depth)
-
-{
-   const int dims = texture_dims(bld->static_state->target);
-   struct lp_build_context *coord_bld = &bld->coord_bld;
-
-   LLVMValueRef lod_bias = lp_build_const_scalar(bld->coord_bld.type,
-                                                 bld->static_state->lod_bias);
-   LLVMValueRef min_lod = lp_build_const_scalar(bld->coord_bld.type,
-                                                bld->static_state->min_lod);
-   LLVMValueRef max_lod = lp_build_const_scalar(bld->coord_bld.type,
-                                                bld->static_state->max_lod);
-
-   LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
-   LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
-   LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
-
-   LLVMValueRef s0, s1, s2;
-   LLVMValueRef t0, t1, t2;
-   LLVMValueRef r0, r1, r2;
-   LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
-   LLVMValueRef rho, lod;
-
-   /*
-    * dsdx = abs(s[1] - s[0]);
-    * dsdy = abs(s[2] - s[0]);
-    * dtdx = abs(t[1] - t[0]);
-    * dtdy = abs(t[2] - t[0]);
-    * drdx = abs(r[1] - r[0]);
-    * drdy = abs(r[2] - r[0]);
-    * XXX we're assuming a four-element quad in 2x2 layout here.
-    */
-   s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0");
-   s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1");
-   s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2");
-   dsdx = lp_build_abs(coord_bld, lp_build_sub(coord_bld, s1, s0));
-   dsdy = lp_build_abs(coord_bld, lp_build_sub(coord_bld, s2, s0));
-   if (dims > 1) {
-      t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0");
-      t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1");
-      t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2");
-      dtdx = lp_build_abs(coord_bld, lp_build_sub(coord_bld, t1, t0));
-      dtdy = lp_build_abs(coord_bld, lp_build_sub(coord_bld, t2, t0));
-      if (dims > 2) {
-         r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0");
-         r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1");
-         r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2");
-         drdx = lp_build_abs(coord_bld, lp_build_sub(coord_bld, r1, r0));
-         drdy = lp_build_abs(coord_bld, lp_build_sub(coord_bld, r2, r0));
-      }
-   }
-
-   /* Compute rho = max of all partial derivatives scaled by texture size.
-    * XXX this can be vectorized somewhat
-    */
-   rho = lp_build_mul(coord_bld,
-                       lp_build_max(coord_bld, dsdx, dsdy),
-                       lp_build_int_to_float(coord_bld, width));
-   if (dims > 1) {
-      LLVMValueRef max;
-      max = lp_build_mul(coord_bld,
-                         lp_build_max(coord_bld, dtdx, dtdy),
-                         lp_build_int_to_float(coord_bld, height));
-      rho = lp_build_max(coord_bld, rho, max);
-      if (dims > 2) {
-         max = lp_build_mul(coord_bld,
-                            lp_build_max(coord_bld, drdx, drdy),
-                            lp_build_int_to_float(coord_bld, depth));
-         rho = lp_build_max(coord_bld, rho, max);
-      }
-   }
-
-   /* compute lod = log2(rho) */
-   lod = lp_build_log2(coord_bld, rho);
-
-   /* add lod bias */
-   lod = lp_build_add(coord_bld, lod, lod_bias);
-
-   /* clamp lod */
-   lod = lp_build_clamp(coord_bld, lod, min_lod, max_lod);
-
-   return lod;
-}
-
-
-/**
- * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
- * mipmap level index.
- * \param lod  scalar float texture level of detail
- * \param level_out  returns integer 
- */
-static void
-lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
-                           unsigned unit,
-                           LLVMValueRef lod,
-                           LLVMValueRef *level_out)
-{
-   struct lp_build_context *coord_bld = &bld->coord_bld;
-   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
-   LLVMValueRef last_level, level;
-
-   last_level = bld->dynamic_state->last_level(bld->dynamic_state,
-                                               bld->builder, unit);
-
-   /* convert float lod to integer */
-   level = lp_build_iround(coord_bld, lod);
-
-   /* clamp level to legal range of levels */
-   *level_out = lp_build_clamp(int_coord_bld, level,
-                               int_coord_bld->zero,
-                               last_level);
-}
-
-
-/**
- * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
- * two (adjacent) mipmap level indexes.  Later, we'll sample from those
- * two mipmap levels and interpolate between them.
- */
-static void
-lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
-                           unsigned unit,
-                           LLVMValueRef lod,
-                           LLVMValueRef *level0_out,
-                           LLVMValueRef *level1_out,
-                           LLVMValueRef *weight_out)
-{
-   struct lp_build_context *coord_bld = &bld->coord_bld;
-   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
-   LLVMValueRef last_level, level;
-
-   last_level = bld->dynamic_state->last_level(bld->dynamic_state,
-                                               bld->builder, unit);
-
-   /* convert float lod to integer */
-   level = lp_build_ifloor(coord_bld, lod);
-
-   /* compute level 0 and clamp to legal range of levels */
-   *level0_out = lp_build_clamp(int_coord_bld, level,
-                                int_coord_bld->zero,
-                                last_level);
-   /* compute level 1 and clamp to legal range of levels */
-   *level1_out = lp_build_add(int_coord_bld, *level0_out, int_coord_bld->one);
-   *level1_out = lp_build_min(int_coord_bld, *level1_out, int_coord_bld->zero);
-
-   *weight_out = lp_build_fract(coord_bld, lod);
-}
-
-
-
 /**
  * Build texture sampling code.
  * 'texel' will return a vector of four LLVMValueRefs corresponding to
  * R, G, B, A.
+ * \param type  vector float type to use for coords, etc.
  */
 void
 lp_build_sample_soa(LLVMBuilderRef builder,
@@ -1267,17 +1357,19 @@ lp_build_sample_soa(LLVMBuilderRef builder,
                     LLVMValueRef *texel)
 {
    struct lp_build_sample_context bld;
-   LLVMValueRef width;
-   LLVMValueRef height;
-   LLVMValueRef stride;
+   LLVMValueRef width, width_vec;
+   LLVMValueRef height, height_vec;
+   LLVMValueRef stride, stride_vec;
    LLVMValueRef data_array;
    LLVMValueRef s;
    LLVMValueRef t;
    LLVMValueRef r;
+   boolean done = FALSE;
 
    (void) lp_build_lod_selector;   /* temporary to silence warning */
    (void) lp_build_nearest_mip_level;
    (void) lp_build_linear_mip_levels;
+   (void) lp_build_minify;
 
    /* Setup our build context */
    memset(&bld, 0, sizeof bld);
@@ -1285,10 +1377,16 @@ lp_build_sample_soa(LLVMBuilderRef builder,
    bld.static_state = static_state;
    bld.dynamic_state = dynamic_state;
    bld.format_desc = util_format_description(static_state->format);
+
+   bld.float_type = lp_type_float(32);
+   bld.int_type = lp_type_int(32);
    bld.coord_type = type;
    bld.uint_coord_type = lp_uint_type(type);
    bld.int_coord_type = lp_int_type(type);
    bld.texel_type = type;
+
+   lp_build_context_init(&bld.float_bld, builder, bld.float_type);
+   lp_build_context_init(&bld.int_bld, builder, bld.int_type);
    lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
    lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type);
    lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
@@ -1305,30 +1403,56 @@ lp_build_sample_soa(LLVMBuilderRef builder,
    t = coords[1];
    r = coords[2];
 
-   width = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
-   height = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
-   stride = lp_build_broadcast_scalar(&bld.uint_coord_bld, stride);
+   width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
+   height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
+   stride_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, stride);
 
    if(static_state->target == PIPE_TEXTURE_1D)
       t = bld.coord_bld.zero;
 
-   switch (static_state->min_img_filter) {
-   case PIPE_TEX_FILTER_NEAREST:
-      lp_build_sample_2d_nearest_soa(&bld, s, t, width, height,
-                                     stride, data_array, texel);
+   switch (static_state->min_mip_filter) {
+   case PIPE_TEX_MIPFILTER_NONE:
       break;
-   case PIPE_TEX_FILTER_LINEAR:
-      if(lp_format_is_rgba8(bld.format_desc) &&
-         is_simple_wrap_mode(static_state->wrap_s) &&
-         is_simple_wrap_mode(static_state->wrap_t))
-         lp_build_sample_2d_linear_aos(&bld, s, t, width, height,
-                                       stride, data_array, texel);
-      else
-         lp_build_sample_2d_linear_soa(&bld, s, t, width, height,
-                                       stride, data_array, texel);
+   case PIPE_TEX_MIPFILTER_NEAREST:
+
+      switch (static_state->min_img_filter) {
+      case PIPE_TEX_FILTER_NEAREST:
+         lp_build_sample_2d_nearest_mip_nearest_soa(&bld, unit,
+                                                    s, t,
+                                                    width, height,
+                                                    width_vec, height_vec,
+                                                    stride_vec,
+                                                    data_array, texel);
+         done = TRUE;
+         break;
+      }
+
+      break;
+   case PIPE_TEX_MIPFILTER_LINEAR:
       break;
    default:
-      assert(0);
+      assert(0 && "invalid mip filter");
+   }
+
+   if (!done) {
+      switch (static_state->min_img_filter) {
+      case PIPE_TEX_FILTER_NEAREST:
+         lp_build_sample_2d_nearest_soa(&bld, s, t, width_vec, height_vec,
+                                        stride_vec, data_array, texel);
+         break;
+      case PIPE_TEX_FILTER_LINEAR:
+         if(lp_format_is_rgba8(bld.format_desc) &&
+            is_simple_wrap_mode(static_state->wrap_s) &&
+            is_simple_wrap_mode(static_state->wrap_t))
+            lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec,
+                                          stride_vec, data_array, texel);
+         else
+            lp_build_sample_2d_linear_soa(&bld, s, t, width_vec, height_vec,
+                                          stride_vec, data_array, texel);
+         break;
+      default:
+         assert(0);
+      }
    }
 
    /* FIXME: respect static_state->min_mip_filter */;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h
index 16946cc28a..4daa904e63 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_type.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h
@@ -103,7 +103,7 @@ struct lp_type {
    unsigned width:14;
 
    /**
-    * Vector length.
+    * Vector length.  If length==1, this is a scalar (float/int) type.
     *
     * width*length should be a power of two greater or equal to eight.
     *
@@ -139,11 +139,28 @@ struct lp_build_context
 };
 
 
+/** Create scalar float type */
 static INLINE struct lp_type
 lp_type_float(unsigned width)
 {
    struct lp_type res_type;
 
+   memset(&res_type, 0, sizeof res_type);
+   res_type.floating = TRUE;
+   res_type.sign = TRUE;
+   res_type.width = width;
+   res_type.length = 1;
+
+   return res_type;
+}
+
+
+/** Create vector of float type */
+static INLINE struct lp_type
+lp_type_float_vec(unsigned width)
+{
+   struct lp_type res_type;
+
    memset(&res_type, 0, sizeof res_type);
    res_type.floating = TRUE;
    res_type.sign = TRUE;
@@ -154,11 +171,27 @@ lp_type_float(unsigned width)
 }
 
 
+/** Create scalar int type */
 static INLINE struct lp_type
 lp_type_int(unsigned width)
 {
    struct lp_type res_type;
 
+   memset(&res_type, 0, sizeof res_type);
+   res_type.sign = TRUE;
+   res_type.width = width;
+   res_type.length = 1;
+
+   return res_type;
+}
+
+
+/** Create vector int type */
+static INLINE struct lp_type
+lp_type_int_vec(unsigned width)
+{
+   struct lp_type res_type;
+
    memset(&res_type, 0, sizeof res_type);
    res_type.sign = TRUE;
    res_type.width = width;
@@ -168,11 +201,26 @@ lp_type_int(unsigned width)
 }
 
 
+/** Create scalar uint type */
 static INLINE struct lp_type
 lp_type_uint(unsigned width)
 {
    struct lp_type res_type;
 
+   memset(&res_type, 0, sizeof res_type);
+   res_type.width = width;
+   res_type.length = 1;
+
+   return res_type;
+}
+
+
+/** Create vector uint type */
+static INLINE struct lp_type
+lp_type_uint_vec(unsigned width)
+{
+   struct lp_type res_type;
+
    memset(&res_type, 0, sizeof res_type);
    res_type.width = width;
    res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
-- 
cgit v1.2.3


From bf50d40f1ba68a0f19ca06196af99c01efce1e76 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Tue, 9 Mar 2010 20:12:52 +0100
Subject: r300g: kill pf_component_count

---
 src/gallium/drivers/r300/r300_state_inlines.h | 29 +--------------------------
 1 file changed, 1 insertion(+), 28 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index af7827820c..a32924ed0a 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -348,39 +348,12 @@ static INLINE uint32_t r300_translate_gb_pipes(int pipe_count)
     return 0;
 }
 
-/* Utility function to count the number of components in RGBAZS formats.
- * XXX should go to util or p_format.h */
-static INLINE unsigned pf_component_count(enum pipe_format format) {
-    unsigned count = 0;
-
-    if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0)) {
-        count++;
-    }
-    if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 1)) {
-        count++;
-    }
-    if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 2)) {
-        count++;
-    }
-    if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 3)) {
-        count++;
-    }
-    if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_ZS, 0)) {
-        count++;
-    }
-    if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_ZS, 1)) {
-        count++;
-    }
-
-    return count;
-}
-
 /* Translate pipe_formats into PSC vertex types. */
 static INLINE uint16_t
 r300_translate_vertex_data_type(enum pipe_format format) {
     uint32_t result = 0;
     const struct util_format_description *desc;
-    unsigned components = pf_component_count(format);
+    unsigned components = util_format_get_nr_components(format);
 
     desc = util_format_description(format);
 
-- 
cgit v1.2.3


From 0d6b0b0d9d5257cc8fb95786b6cd77d088bdb35e Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Tue, 9 Mar 2010 19:05:32 +0100
Subject: cso: don't forget to release vertex elements state

---
 src/gallium/auxiliary/cso_cache/cso_context.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index f8cb01467c..6500891a10 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -289,6 +289,7 @@ void cso_release_all( struct cso_context *ctx )
       ctx->pipe->bind_depth_stencil_alpha_state( ctx->pipe, NULL );
       ctx->pipe->bind_fs_state( ctx->pipe, NULL );
       ctx->pipe->bind_vs_state( ctx->pipe, NULL );
+      ctx->pipe->bind_vertex_elements_state( ctx->pipe, NULL );
    }
 
    for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
-- 
cgit v1.2.3


From a673dee4553b6f1649d4cc31f7f7849058411c49 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Tue, 9 Mar 2010 20:31:40 +0100
Subject: mesa/st: initialize velements state properly

one variable is a bitfield where the rest is never written to, which caused
valgrind to complain. Might have caused cso to not recognize an already stored
state. Reported by Christoph Bumiller.
---
 src/mesa/state_tracker/st_draw.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index 4d2e39108d..8a6e1ed466 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -563,6 +563,7 @@ st_draw_vbo(GLcontext *ctx,
    (void) check_uniforms;
 #endif
 
+   memset(velements, 0, sizeof(struct pipe_vertex_element) * vpv->num_inputs);
    /*
     * Setup the vbuffer[] and velements[] arrays.
     */
-- 
cgit v1.2.3


From dc1e4cf87fdab5a57049fbf4180e3597d80323cd Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Tue, 9 Mar 2010 19:14:34 +0100
Subject: svga: Don't do culling while the software pipeline is active.

It does it for us, and additionally introduces potentially
back-facing triangles.
---
 src/gallium/drivers/svga/svga_state_rss.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/svga/svga_state_rss.c b/src/gallium/drivers/svga/svga_state_rss.c
index 107cc403b4..b7195d246b 100644
--- a/src/gallium/drivers/svga/svga_state_rss.c
+++ b/src/gallium/drivers/svga/svga_state_rss.c
@@ -191,15 +191,24 @@ static int emit_rss( struct svga_context *svga,
       EMIT_RS( svga, svga->curr.stencil_ref.ref_value[0], STENCILREF, fail );
    }
 
-   if (dirty & SVGA_NEW_RAST)
+   if (dirty & (SVGA_NEW_RAST | SVGA_NEW_NEED_PIPELINE))
    {
       const struct svga_rasterizer_state *curr = svga->curr.rast; 
+      unsigned cullmode = curr->cullmode;
 
       /* Shademode: still need to rearrange index list to move
        * flat-shading PV first vertex.
        */
       EMIT_RS( svga, curr->shademode, SHADEMODE, fail );
-      EMIT_RS( svga, curr->cullmode, CULLMODE, fail );
+
+      /* Don't do culling while the software pipeline is active.  It
+       * does it for us, and additionally introduces potentially
+       * back-facing triangles.
+       */
+      if (svga->state.sw.need_pipeline)
+         cullmode = SVGA3D_FACE_NONE;
+
+      EMIT_RS( svga, cullmode, CULLMODE, fail );
       EMIT_RS( svga, curr->scissortestenable, SCISSORTESTENABLE, fail );
       EMIT_RS( svga, curr->multisampleantialias, MULTISAMPLEANTIALIAS, fail );
       EMIT_RS( svga, curr->lastpixel, LASTPIXEL, fail );
-- 
cgit v1.2.3


From fe14868d96d4820dba73c3a507d191b8a73c6870 Mon Sep 17 00:00:00 2001
From: George Sapountzis <gsapountzis@gmail.com>
Date: Tue, 9 Mar 2010 22:03:24 +0200
Subject: drop stray XFree86Server, XGLServer

---
 src/gallium/include/pipe/p_compiler.h | 5 -----
 src/mesa/glapi/glapi_getproc.c        | 8 ++++----
 2 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h
index b93b38310a..e2766d15cd 100644
--- a/src/gallium/include/pipe/p_compiler.h
+++ b/src/gallium/include/pipe/p_compiler.h
@@ -31,13 +31,8 @@
 
 #include "p_config.h"
 
-#ifndef XFree86Server
 #include <stdlib.h>
 #include <string.h>
-#else
-#include "xf86_ansic.h"
-#include "xf86_libc.h"
-#endif
 #include <stddef.h>
 #include <stdarg.h>
 
diff --git a/src/mesa/glapi/glapi_getproc.c b/src/mesa/glapi/glapi_getproc.c
index a6dbf173e8..2b89a8f4f4 100644
--- a/src/mesa/glapi/glapi_getproc.c
+++ b/src/mesa/glapi/glapi_getproc.c
@@ -53,7 +53,7 @@
 # endif
 #endif
 
-#if !defined(DISPATCH_FUNCTION_SIZE) && !defined(XFree86Server) && !defined(XGLServer)
+#if !defined(DISPATCH_FUNCTION_SIZE) && !defined(XFree86Server)
 # define NEED_FUNCTION_POINTER
 #endif
 
@@ -112,7 +112,7 @@ extern const GLubyte gl_dispatch_functions_start[];
 #endif /* USE_X86_ASM */
 
 
-#if !defined(XFree86Server) && !defined(XGLServer)
+#if !defined(XFree86Server)
 
 /**
  * Return dispatch function address for the named static (built-in) function.
@@ -140,7 +140,7 @@ get_static_proc_address(const char *funcName)
    }
 }
 
-#endif /* !defined(XFree86Server) && !defined(XGLServer) */
+#endif /* !defined(XFree86Server) */
 
 
@@ -724,7 +724,7 @@ _glapi_get_proc_address(const char *funcName)
       }
    }
 
-#if !defined( XFree86Server ) && !defined( XGLServer )
+#if !defined( XFree86Server )
    /* search static functions */
    {
       const _glapi_proc func = get_static_proc_address(funcName);
-- 
cgit v1.2.3


From 2c8b5ffed9a787e896b540a95be48ef401d5f007 Mon Sep 17 00:00:00 2001
From: George Sapountzis <gsapountzis@gmail.com>
Date: Tue, 9 Mar 2010 22:03:24 +0200
Subject: glapi: mv table functions to glapi_getproc.c and add local header

---
 src/mesa/glapi/glapi.c         | 44 +--------------------------------
 src/mesa/glapi/glapi.h         | 21 ----------------
 src/mesa/glapi/glapi_getproc.c | 56 ++++++++++++++++++++++++++++++++++++++++--
 src/mesa/glapi/glapi_priv.h    | 43 ++++++++++++++++++++++++++++++++
 4 files changed, 98 insertions(+), 66 deletions(-)
 create mode 100644 src/mesa/glapi/glapi_priv.h

diff --git a/src/mesa/glapi/glapi.c b/src/mesa/glapi/glapi.c
index 13de594aaf..ce85cf6a87 100644
--- a/src/mesa/glapi/glapi.c
+++ b/src/mesa/glapi/glapi.c
@@ -59,7 +59,7 @@
 #endif
 
 #include "glapi/glapi.h"
-#include "glapi/glapitable.h"
+#include "glapi/glapi_priv.h"
 
 extern _glapi_proc __glapi_noop_table[];
 
@@ -291,45 +291,3 @@ _glapi_get_dispatch(void)
    return _glapi_Dispatch;
 #endif
 }
-
-
-
-
-/*
- * The dispatch table size (number of entries) is the size of the
- * _glapi_table struct plus the number of dynamic entries we can add.
- * The extra slots can be filled in by DRI drivers that register new extension
- * functions.
- */
-#define DISPATCH_TABLE_SIZE (sizeof(struct _glapi_table) / sizeof(void *) + MAX_EXTENSION_FUNCS)
-
-
-/**
- * Return size of dispatch table struct as number of functions (or
- * slots).
- */
-PUBLIC GLuint
-_glapi_get_dispatch_table_size(void)
-{
-   return DISPATCH_TABLE_SIZE;
-}
-
-
-/**
- * Make sure there are no NULL pointers in the given dispatch table.
- * Intended for debugging purposes.
- */
-void
-_glapi_check_table_not_null(const struct _glapi_table *table)
-{
-#if 0 /* enable this for extra DEBUG */
-   const GLuint entries = _glapi_get_dispatch_table_size();
-   const void **tab = (const void **) table;
-   GLuint i;
-   for (i = 1; i < entries; i++) {
-      assert(tab[i]);
-   }
-#else
-   (void) table;
-#endif
-}
diff --git a/src/mesa/glapi/glapi.h b/src/mesa/glapi/glapi.h
index 1ca2e4beff..7dcf2e8910 100644
--- a/src/mesa/glapi/glapi.h
+++ b/src/mesa/glapi/glapi.h
@@ -165,29 +165,8 @@ extern _glapi_proc
 _glapi_get_proc_address(const char *funcName);
 
 
-/**
- * GL API local functions and defines
- */
-
-extern void
-init_glapi_relocs_once(void);
-
-extern void
-_glapi_check_table_not_null(const struct _glapi_table *table);
-
-
-extern void
-_glapi_check_table(const struct _glapi_table *table);
-
-
 extern const char *
 _glapi_get_proc_name(unsigned int offset);
 
 
-/*
- * Number of extension functions which we can dynamically add at runtime.
- */
-#define MAX_EXTENSION_FUNCS 300
-
-
 #endif
diff --git a/src/mesa/glapi/glapi_getproc.c b/src/mesa/glapi/glapi_getproc.c
index 2b89a8f4f4..fbf48f1388 100644
--- a/src/mesa/glapi/glapi_getproc.c
+++ b/src/mesa/glapi/glapi_getproc.c
@@ -39,8 +39,9 @@
 #endif
 
 #include "glapi/glapi.h"
-#include "glapi/glapioffsets.h"
+#include "glapi/glapi_priv.h"
 #include "glapi/glapitable.h"
+#include "glapi/glapioffsets.h"
 
 
 #if defined(USE_X64_64_ASM) && defined(GLX_USE_TLS)
@@ -378,6 +379,12 @@ struct _glapi_function {
 };
 
 
+/*
+ * Number of extension functions which we can dynamically add at runtime.
+ */
+#define MAX_EXTENSION_FUNCS 300
+
+
 static struct _glapi_function ExtEntryTable[MAX_EXTENSION_FUNCS];
 static GLuint NumExtEntryPoints = 0;
 
@@ -766,6 +773,51 @@ _glapi_get_proc_name(GLuint offset)
 
 
+/**********************************************************************
+ * GL API table functions.
+ */
+
+
+/*
+ * The dispatch table size (number of entries) is the size of the
+ * _glapi_table struct plus the number of dynamic entries we can add.
+ * The extra slots can be filled in by DRI drivers that register new extension
+ * functions.
+ */
+#define DISPATCH_TABLE_SIZE (sizeof(struct _glapi_table) / sizeof(void *) + MAX_EXTENSION_FUNCS)
+
+
+/**
+ * Return size of dispatch table struct as number of functions (or
+ * slots).
+ */
+PUBLIC GLuint
+_glapi_get_dispatch_table_size(void)
+{
+   return DISPATCH_TABLE_SIZE;
+}
+
+
+/**
+ * Make sure there are no NULL pointers in the given dispatch table.
+ * Intended for debugging purposes.
+ */
+void
+_glapi_check_table_not_null(const struct _glapi_table *table)
+{
+#ifdef EXTRA_DEBUG /* set to DEBUG for extra DEBUG */
+   const GLuint entries = _glapi_get_dispatch_table_size();
+   const void **tab = (const void **) table;
+   GLuint i;
+   for (i = 1; i < entries; i++) {
+      assert(tab[i]);
+   }
+#else
+   (void) table;
+#endif
+}
+
+
 /**
  * Do some spot checks to be sure that the dispatch table
  * slots are assigned correctly. For debugging only.
@@ -773,7 +825,7 @@ _glapi_get_proc_name(GLuint offset)
 void
 _glapi_check_table(const struct _glapi_table *table)
 {
-#if 0 /* enable this for extra DEBUG */
+#ifdef EXTRA_DEBUG /* set to DEBUG for extra DEBUG */
    {
       GLuint BeginOffset = _glapi_get_proc_offset("glBegin");
       char *BeginFunc = (char*) &table->Begin;
diff --git a/src/mesa/glapi/glapi_priv.h b/src/mesa/glapi/glapi_priv.h
new file mode 100644
index 0000000000..05eda99ccf
--- /dev/null
+++ b/src/mesa/glapi/glapi_priv.h
@@ -0,0 +1,43 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.1
+ *
+ * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef _GLAPI_PRIV_H
+#define _GLAPI_PRIV_H
+
+#include "glthread.h"
+
+extern void
+_glapi_check_table_not_null(const struct _glapi_table *table);
+
+
+extern void
+_glapi_check_table(const struct _glapi_table *table);
+
+
+extern void
+init_glapi_relocs_once(void);
+
+
+#endif
-- 
cgit v1.2.3


From 54ba95a4de749de1da73b3917aac99eb1d57d7fa Mon Sep 17 00:00:00 2001
From: George Sapountzis <gsapountzis@gmail.com>
Date: Tue, 9 Mar 2010 22:03:24 +0200
Subject: glapi: split out arch-specific code for entrypoints

---
 src/mesa/glapi/glapi_entrypoint.c | 331 ++++++++++++++++++++++++++++++++++++++
 src/mesa/glapi/glapi_getproc.c    | 297 +---------------------------------
 src/mesa/glapi/glapi_priv.h       |  23 +++
 src/mesa/sources.mak              |   1 +
 4 files changed, 357 insertions(+), 295 deletions(-)
 create mode 100644 src/mesa/glapi/glapi_entrypoint.c

diff --git a/src/mesa/glapi/glapi_entrypoint.c b/src/mesa/glapi/glapi_entrypoint.c
new file mode 100644
index 0000000000..5e6e5995f2
--- /dev/null
+++ b/src/mesa/glapi/glapi_entrypoint.c
@@ -0,0 +1,331 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.1
+ *
+ * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file glapi_entrypoint.c
+ *
+ * Arch-specific code for manipulating GL API entrypoints (dispatch stubs).
+ */
+
+
+#ifdef HAVE_DIX_CONFIG_H
+#include <dix-config.h>
+#include "glapi/mesa.h"
+#else
+#include "main/glheader.h"
+#include "main/compiler.h"
+#endif
+
+#include "glapi/glapi.h"
+#include "glapi/glapi_priv.h"
+
+
+#ifdef USE_X86_ASM
+
+#if defined( GLX_USE_TLS )
+extern       GLubyte gl_dispatch_functions_start[];
+extern       GLubyte gl_dispatch_functions_end[];
+#else
+extern const GLubyte gl_dispatch_functions_start[];
+#endif
+
+#endif /* USE_X86_ASM */
+
+
+#if defined(DISPATCH_FUNCTION_SIZE)
+
+_glapi_proc
+get_entrypoint_address(GLuint functionOffset)
+{
+   return (_glapi_proc) (gl_dispatch_functions_start
+                         + (DISPATCH_FUNCTION_SIZE * functionOffset));
+}
+
+#endif
+
+
+#if defined(PTHREADS) || defined(GLX_USE_TLS)
+
+/**
+ * Perform platform-specific GL API entry-point fixups.
+ */
+static void
+init_glapi_relocs( void )
+{
+#if defined(USE_X86_ASM) && defined(GLX_USE_TLS) && !defined(GLX_X86_READONLY_TEXT)
+    extern unsigned long _x86_get_dispatch(void);
+    char run_time_patch[] = {
+       0x65, 0xa1, 0, 0, 0, 0 /* movl %gs:0,%eax */
+    };
+    GLuint *offset = (GLuint *) &run_time_patch[2]; /* 32-bits for x86/32 */
+    const GLubyte * const get_disp = (const GLubyte *) run_time_patch;
+    GLubyte * curr_func = (GLubyte *) gl_dispatch_functions_start;
+
+    *offset = _x86_get_dispatch();
+    while ( curr_func != (GLubyte *) gl_dispatch_functions_end ) {
+	(void) memcpy( curr_func, get_disp, sizeof(run_time_patch));
+	curr_func += DISPATCH_FUNCTION_SIZE;
+    }
+#endif
+#ifdef USE_SPARC_ASM
+    extern void __glapi_sparc_icache_flush(unsigned int *);
+    static const unsigned int template[] = {
+#ifdef GLX_USE_TLS
+	0x05000000, /* sethi %hi(_glapi_tls_Dispatch), %g2 */
+	0x8730e00a, /* srl %g3, 10, %g3 */
+	0x8410a000, /* or %g2, %lo(_glapi_tls_Dispatch), %g2 */
+#ifdef __arch64__
+	0xc259c002, /* ldx [%g7 + %g2], %g1 */
+	0xc2584003, /* ldx [%g1 + %g3], %g1 */
+#else
+	0xc201c002, /* ld [%g7 + %g2], %g1 */
+	0xc2004003, /* ld [%g1 + %g3], %g1 */
+#endif
+	0x81c04000, /* jmp %g1 */
+	0x01000000, /* nop  */
+#else
+#ifdef __arch64__
+	0x03000000, /* 64-bit 0x00 --> sethi %hh(_glapi_Dispatch), %g1 */
+	0x05000000, /* 64-bit 0x04 --> sethi %lm(_glapi_Dispatch), %g2 */
+	0x82106000, /* 64-bit 0x08 --> or %g1, %hm(_glapi_Dispatch), %g1 */
+	0x8730e00a, /* 64-bit 0x0c --> srl %g3, 10, %g3 */
+	0x83287020, /* 64-bit 0x10 --> sllx %g1, 32, %g1 */
+	0x82004002, /* 64-bit 0x14 --> add %g1, %g2, %g1 */
+	0xc2586000, /* 64-bit 0x18 --> ldx [%g1 + %lo(_glapi_Dispatch)], %g1 */
+#else
+	0x03000000, /* 32-bit 0x00 --> sethi %hi(_glapi_Dispatch), %g1 */
+	0x8730e00a, /* 32-bit 0x04 --> srl %g3, 10, %g3 */
+	0xc2006000, /* 32-bit 0x08 --> ld [%g1 + %lo(_glapi_Dispatch)], %g1 */
+#endif
+	0x80a06000, /*             --> cmp %g1, 0 */
+	0x02800005, /*             --> be +4*5 */
+	0x01000000, /*             -->  nop  */
+#ifdef __arch64__
+	0xc2584003, /* 64-bit      --> ldx [%g1 + %g3], %g1 */
+#else
+	0xc2004003, /* 32-bit      --> ld [%g1 + %g3], %g1 */
+#endif
+	0x81c04000, /*             --> jmp %g1 */
+	0x01000000, /*             --> nop  */
+#ifdef __arch64__
+	0x9de3bf80, /* 64-bit      --> save  %sp, -128, %sp */
+#else
+	0x9de3bfc0, /* 32-bit      --> save  %sp, -64, %sp */
+#endif
+	0xa0100003, /*             --> mov  %g3, %l0 */
+	0x40000000, /*             --> call _glapi_get_dispatch */
+	0x01000000, /*             -->  nop */
+	0x82100008, /*             --> mov %o0, %g1 */
+	0x86100010, /*             --> mov %l0, %g3 */
+	0x10bffff7, /*             --> ba -4*9 */
+	0x81e80000, /*             -->  restore  */
+#endif
+    };
+#ifdef GLX_USE_TLS
+    extern unsigned int __glapi_sparc_tls_stub;
+    extern unsigned long __glapi_sparc_get_dispatch(void);
+    unsigned int *code = &__glapi_sparc_tls_stub;
+    unsigned long dispatch = __glapi_sparc_get_dispatch();
+#else
+    extern unsigned int __glapi_sparc_pthread_stub;
+    unsigned int *code = &__glapi_sparc_pthread_stub;
+    unsigned long dispatch = (unsigned long) &_glapi_Dispatch;
+    unsigned long call_dest = (unsigned long ) &_glapi_get_dispatch;
+    int idx;
+#endif
+
+#if defined(GLX_USE_TLS)
+    code[0] = template[0] | (dispatch >> 10);
+    code[1] = template[1];
+    __glapi_sparc_icache_flush(&code[0]);
+    code[2] = template[2] | (dispatch & 0x3ff);
+    code[3] = template[3];
+    __glapi_sparc_icache_flush(&code[2]);
+    code[4] = template[4];
+    code[5] = template[5];
+    __glapi_sparc_icache_flush(&code[4]);
+    code[6] = template[6];
+    __glapi_sparc_icache_flush(&code[6]);
+#else
+#if defined(__arch64__)
+    code[0] = template[0] | (dispatch >> (32 + 10));
+    code[1] = template[1] | ((dispatch & 0xffffffff) >> 10);
+    __glapi_sparc_icache_flush(&code[0]);
+    code[2] = template[2] | ((dispatch >> 32) & 0x3ff);
+    code[3] = template[3];
+    __glapi_sparc_icache_flush(&code[2]);
+    code[4] = template[4];
+    code[5] = template[5];
+    __glapi_sparc_icache_flush(&code[4]);
+    code[6] = template[6] | (dispatch & 0x3ff);
+    idx = 7;
+#else
+    code[0] = template[0] | (dispatch >> 10);
+    code[1] = template[1];
+    __glapi_sparc_icache_flush(&code[0]);
+    code[2] = template[2] | (dispatch & 0x3ff);
+    idx = 3;
+#endif
+    code[idx + 0] = template[idx + 0];
+    __glapi_sparc_icache_flush(&code[idx - 1]);
+    code[idx + 1] = template[idx + 1];
+    code[idx + 2] = template[idx + 2];
+    __glapi_sparc_icache_flush(&code[idx + 1]);
+    code[idx + 3] = template[idx + 3];
+    code[idx + 4] = template[idx + 4];
+    __glapi_sparc_icache_flush(&code[idx + 3]);
+    code[idx + 5] = template[idx + 5];
+    code[idx + 6] = template[idx + 6];
+    __glapi_sparc_icache_flush(&code[idx + 5]);
+    code[idx + 7] = template[idx + 7];
+    code[idx + 8] = template[idx + 8] |
+	    (((call_dest - ((unsigned long) &code[idx + 8]))
+	      >> 2) & 0x3fffffff);
+    __glapi_sparc_icache_flush(&code[idx + 7]);
+    code[idx + 9] = template[idx + 9];
+    code[idx + 10] = template[idx + 10];
+    __glapi_sparc_icache_flush(&code[idx + 9]);
+    code[idx + 11] = template[idx + 11];
+    code[idx + 12] = template[idx + 12];
+    __glapi_sparc_icache_flush(&code[idx + 11]);
+    code[idx + 13] = template[idx + 13];
+    __glapi_sparc_icache_flush(&code[idx + 13]);
+#endif
+#endif
+}
+
+void
+init_glapi_relocs_once( void )
+{
+   static pthread_once_t once_control = PTHREAD_ONCE_INIT;
+   pthread_once( & once_control, init_glapi_relocs );
+}
+
+#else
+
+void
+init_glapi_relocs_once( void ) { }
+
+#endif /* defined(PTHREADS) || defined(GLX_USE_TLS) */
+
+
+#ifdef USE_SPARC_ASM
+extern void __glapi_sparc_icache_flush(unsigned int *);
+#endif
+
+/**
+ * Generate a dispatch function (entrypoint) which jumps through
+ * the given slot number (offset) in the current dispatch table.
+ * We need assembly language in order to accomplish this.
+ */
+_glapi_proc
+generate_entrypoint(GLuint functionOffset)
+{
+#if defined(USE_X86_ASM)
+   /* 32 is chosen as something of a magic offset.  For x86, the dispatch
+    * at offset 32 is the first one where the offset in the
+    * "jmp OFFSET*4(%eax)" can't be encoded in a single byte.
+    */
+   const GLubyte * const template_func = gl_dispatch_functions_start 
+     + (DISPATCH_FUNCTION_SIZE * 32);
+   GLubyte * const code = (GLubyte *) malloc(DISPATCH_FUNCTION_SIZE);
+
+
+   if ( code != NULL ) {
+      (void) memcpy(code, template_func, DISPATCH_FUNCTION_SIZE);
+      fill_in_entrypoint_offset( (_glapi_proc) code, functionOffset );
+   }
+
+   return (_glapi_proc) code;
+#elif defined(USE_SPARC_ASM)
+
+#if defined(PTHREADS) || defined(GLX_USE_TLS)
+   static const unsigned int template[] = {
+      0x07000000, /* sethi %hi(0), %g3 */
+      0x8210000f, /* mov  %o7, %g1 */
+      0x40000000, /* call */
+      0x9e100001, /* mov  %g1, %o7 */
+   };
+#ifdef GLX_USE_TLS
+   extern unsigned int __glapi_sparc_tls_stub;
+   unsigned long call_dest = (unsigned long ) &__glapi_sparc_tls_stub;
+#else
+   extern unsigned int __glapi_sparc_pthread_stub;
+   unsigned long call_dest = (unsigned long ) &__glapi_sparc_pthread_stub;
+#endif
+   unsigned int *code = (unsigned int *) malloc(sizeof(template));
+   if (code) {
+      code[0] = template[0] | (functionOffset & 0x3fffff);
+      code[1] = template[1];
+      __glapi_sparc_icache_flush(&code[0]);
+      code[2] = template[2] |
+         (((call_dest - ((unsigned long) &code[2]))
+	   >> 2) & 0x3fffffff);
+      code[3] = template[3];
+      __glapi_sparc_icache_flush(&code[2]);
+   }
+   return (_glapi_proc) code;
+#endif
+
+#else
+   (void) functionOffset;
+   return NULL;
+#endif /* USE_*_ASM */
+}
+
+
+/**
+ * This function inserts a new dispatch offset into the assembly language
+ * stub that was generated with the preceeding function.
+ */
+void
+fill_in_entrypoint_offset(_glapi_proc entrypoint, GLuint offset)
+{
+#if defined(USE_X86_ASM)
+   GLubyte * const code = (GLubyte *) entrypoint;
+
+#if DISPATCH_FUNCTION_SIZE == 32
+   *((unsigned int *)(code + 11)) = 4 * offset;
+   *((unsigned int *)(code + 22)) = 4 * offset;
+#elif DISPATCH_FUNCTION_SIZE == 16 && defined( GLX_USE_TLS )
+   *((unsigned int *)(code +  8)) = 4 * offset;
+#elif DISPATCH_FUNCTION_SIZE == 16
+   *((unsigned int *)(code +  7)) = 4 * offset;
+#else
+# error Invalid DISPATCH_FUNCTION_SIZE!
+#endif
+
+#elif defined(USE_SPARC_ASM)
+   unsigned int *code = (unsigned int *) entrypoint;
+   code[0] &= ~0x3fffff;
+   code[0] |= (offset * sizeof(void *)) & 0x3fffff;
+   __glapi_sparc_icache_flush(&code[0]);
+#else
+
+   /* an unimplemented architecture */
+   (void) entrypoint;
+   (void) offset;
+
+#endif /* USE_*_ASM */
+}
diff --git a/src/mesa/glapi/glapi_getproc.c b/src/mesa/glapi/glapi_getproc.c
index fbf48f1388..bd930b875d 100644
--- a/src/mesa/glapi/glapi_getproc.c
+++ b/src/mesa/glapi/glapi_getproc.c
@@ -44,16 +44,6 @@
 #include "glapi/glapioffsets.h"
 
 
-#if defined(USE_X64_64_ASM) && defined(GLX_USE_TLS)
-# define DISPATCH_FUNCTION_SIZE  16
-#elif defined(USE_X86_ASM)
-# if defined(THREADS) && !defined(GLX_USE_TLS)
-#  define DISPATCH_FUNCTION_SIZE  32
-# else
-#  define DISPATCH_FUNCTION_SIZE  16
-# endif
-#endif
-
 #if !defined(DISPATCH_FUNCTION_SIZE) && !defined(XFree86Server)
 # define NEED_FUNCTION_POINTER
 #endif
@@ -101,18 +91,6 @@ get_static_proc_offset(const char *funcName)
 }
 
 
-#ifdef USE_X86_ASM
-
-#if defined( GLX_USE_TLS )
-extern       GLubyte gl_dispatch_functions_start[];
-extern       GLubyte gl_dispatch_functions_end[];
-#else
-extern const GLubyte gl_dispatch_functions_start[];
-#endif
-
-#endif /* USE_X86_ASM */
-
-
 #if !defined(XFree86Server)
 
 /**
@@ -126,12 +104,10 @@ get_static_proc_address(const char *funcName)
    if (f) {
 #if defined(DISPATCH_FUNCTION_SIZE) && defined(GLX_INDIRECT_RENDERING)
       return (f->Address == NULL)
-	 ? (_glapi_proc) (gl_dispatch_functions_start
-			  + (DISPATCH_FUNCTION_SIZE * f->Offset))
+	 ? get_entrypoint_address(f->Offset)
          : f->Address;
 #elif defined(DISPATCH_FUNCTION_SIZE)
-      return (_glapi_proc) (gl_dispatch_functions_start 
-                            + (DISPATCH_FUNCTION_SIZE * f->Offset));
+      return get_entrypoint_address(f->Offset);
 #else
       return f->Address;
 #endif
@@ -163,172 +139,6 @@ get_static_proc_name( GLuint offset )
 
 
-#if defined(PTHREADS) || defined(GLX_USE_TLS)
-
-/**
- * Perform platform-specific GL API entry-point fixups.
- */
-static void
-init_glapi_relocs( void )
-{
-#if defined(USE_X86_ASM) && defined(GLX_USE_TLS) && !defined(GLX_X86_READONLY_TEXT)
-    extern unsigned long _x86_get_dispatch(void);
-    char run_time_patch[] = {
-       0x65, 0xa1, 0, 0, 0, 0 /* movl %gs:0,%eax */
-    };
-    GLuint *offset = (GLuint *) &run_time_patch[2]; /* 32-bits for x86/32 */
-    const GLubyte * const get_disp = (const GLubyte *) run_time_patch;
-    GLubyte * curr_func = (GLubyte *) gl_dispatch_functions_start;
-
-    *offset = _x86_get_dispatch();
-    while ( curr_func != (GLubyte *) gl_dispatch_functions_end ) {
-	(void) memcpy( curr_func, get_disp, sizeof(run_time_patch));
-	curr_func += DISPATCH_FUNCTION_SIZE;
-    }
-#endif
-#ifdef USE_SPARC_ASM
-    extern void __glapi_sparc_icache_flush(unsigned int *);
-    static const unsigned int template[] = {
-#ifdef GLX_USE_TLS
-	0x05000000, /* sethi %hi(_glapi_tls_Dispatch), %g2 */
-	0x8730e00a, /* srl %g3, 10, %g3 */
-	0x8410a000, /* or %g2, %lo(_glapi_tls_Dispatch), %g2 */
-#ifdef __arch64__
-	0xc259c002, /* ldx [%g7 + %g2], %g1 */
-	0xc2584003, /* ldx [%g1 + %g3], %g1 */
-#else
-	0xc201c002, /* ld [%g7 + %g2], %g1 */
-	0xc2004003, /* ld [%g1 + %g3], %g1 */
-#endif
-	0x81c04000, /* jmp %g1 */
-	0x01000000, /* nop  */
-#else
-#ifdef __arch64__
-	0x03000000, /* 64-bit 0x00 --> sethi %hh(_glapi_Dispatch), %g1 */
-	0x05000000, /* 64-bit 0x04 --> sethi %lm(_glapi_Dispatch), %g2 */
-	0x82106000, /* 64-bit 0x08 --> or %g1, %hm(_glapi_Dispatch), %g1 */
-	0x8730e00a, /* 64-bit 0x0c --> srl %g3, 10, %g3 */
-	0x83287020, /* 64-bit 0x10 --> sllx %g1, 32, %g1 */
-	0x82004002, /* 64-bit 0x14 --> add %g1, %g2, %g1 */
-	0xc2586000, /* 64-bit 0x18 --> ldx [%g1 + %lo(_glapi_Dispatch)], %g1 */
-#else
-	0x03000000, /* 32-bit 0x00 --> sethi %hi(_glapi_Dispatch), %g1 */
-	0x8730e00a, /* 32-bit 0x04 --> srl %g3, 10, %g3 */
-	0xc2006000, /* 32-bit 0x08 --> ld [%g1 + %lo(_glapi_Dispatch)], %g1 */
-#endif
-	0x80a06000, /*             --> cmp %g1, 0 */
-	0x02800005, /*             --> be +4*5 */
-	0x01000000, /*             -->  nop  */
-#ifdef __arch64__
-	0xc2584003, /* 64-bit      --> ldx [%g1 + %g3], %g1 */
-#else
-	0xc2004003, /* 32-bit      --> ld [%g1 + %g3], %g1 */
-#endif
-	0x81c04000, /*             --> jmp %g1 */
-	0x01000000, /*             --> nop  */
-#ifdef __arch64__
-	0x9de3bf80, /* 64-bit      --> save  %sp, -128, %sp */
-#else
-	0x9de3bfc0, /* 32-bit      --> save  %sp, -64, %sp */
-#endif
-	0xa0100003, /*             --> mov  %g3, %l0 */
-	0x40000000, /*             --> call _glapi_get_dispatch */
-	0x01000000, /*             -->  nop */
-	0x82100008, /*             --> mov %o0, %g1 */
-	0x86100010, /*             --> mov %l0, %g3 */
-	0x10bffff7, /*             --> ba -4*9 */
-	0x81e80000, /*             -->  restore  */
-#endif
-    };
-#ifdef GLX_USE_TLS
-    extern unsigned int __glapi_sparc_tls_stub;
-    extern unsigned long __glapi_sparc_get_dispatch(void);
-    unsigned int *code = &__glapi_sparc_tls_stub;
-    unsigned long dispatch = __glapi_sparc_get_dispatch();
-#else
-    extern unsigned int __glapi_sparc_pthread_stub;
-    unsigned int *code = &__glapi_sparc_pthread_stub;
-    unsigned long dispatch = (unsigned long) &_glapi_Dispatch;
-    unsigned long call_dest = (unsigned long ) &_glapi_get_dispatch;
-    int idx;
-#endif
-
-#if defined(GLX_USE_TLS)
-    code[0] = template[0] | (dispatch >> 10);
-    code[1] = template[1];
-    __glapi_sparc_icache_flush(&code[0]);
-    code[2] = template[2] | (dispatch & 0x3ff);
-    code[3] = template[3];
-    __glapi_sparc_icache_flush(&code[2]);
-    code[4] = template[4];
-    code[5] = template[5];
-    __glapi_sparc_icache_flush(&code[4]);
-    code[6] = template[6];
-    __glapi_sparc_icache_flush(&code[6]);
-#else
-#if defined(__arch64__)
-    code[0] = template[0] | (dispatch >> (32 + 10));
-    code[1] = template[1] | ((dispatch & 0xffffffff) >> 10);
-    __glapi_sparc_icache_flush(&code[0]);
-    code[2] = template[2] | ((dispatch >> 32) & 0x3ff);
-    code[3] = template[3];
-    __glapi_sparc_icache_flush(&code[2]);
-    code[4] = template[4];
-    code[5] = template[5];
-    __glapi_sparc_icache_flush(&code[4]);
-    code[6] = template[6] | (dispatch & 0x3ff);
-    idx = 7;
-#else
-    code[0] = template[0] | (dispatch >> 10);
-    code[1] = template[1];
-    __glapi_sparc_icache_flush(&code[0]);
-    code[2] = template[2] | (dispatch & 0x3ff);
-    idx = 3;
-#endif
-    code[idx + 0] = template[idx + 0];
-    __glapi_sparc_icache_flush(&code[idx - 1]);
-    code[idx + 1] = template[idx + 1];
-    code[idx + 2] = template[idx + 2];
-    __glapi_sparc_icache_flush(&code[idx + 1]);
-    code[idx + 3] = template[idx + 3];
-    code[idx + 4] = template[idx + 4];
-    __glapi_sparc_icache_flush(&code[idx + 3]);
-    code[idx + 5] = template[idx + 5];
-    code[idx + 6] = template[idx + 6];
-    __glapi_sparc_icache_flush(&code[idx + 5]);
-    code[idx + 7] = template[idx + 7];
-    code[idx + 8] = template[idx + 8] |
-	    (((call_dest - ((unsigned long) &code[idx + 8]))
-	      >> 2) & 0x3fffffff);
-    __glapi_sparc_icache_flush(&code[idx + 7]);
-    code[idx + 9] = template[idx + 9];
-    code[idx + 10] = template[idx + 10];
-    __glapi_sparc_icache_flush(&code[idx + 9]);
-    code[idx + 11] = template[idx + 11];
-    code[idx + 12] = template[idx + 12];
-    __glapi_sparc_icache_flush(&code[idx + 11]);
-    code[idx + 13] = template[idx + 13];
-    __glapi_sparc_icache_flush(&code[idx + 13]);
-#endif
-#endif
-}
-
-void
-init_glapi_relocs_once( void )
-{
-   static pthread_once_t once_control = PTHREAD_ONCE_INIT;
-   pthread_once( & once_control, init_glapi_relocs );
-}
-
-#else
-
-void
-init_glapi_relocs_once( void ) { }
-
-#endif /* defined(PTHREADS) || defined(GLX_USE_TLS) */
-
-
-
 /**********************************************************************
  * Extension function management.
  */
@@ -388,109 +198,6 @@ struct _glapi_function {
 static struct _glapi_function ExtEntryTable[MAX_EXTENSION_FUNCS];
 static GLuint NumExtEntryPoints = 0;
 
-#ifdef USE_SPARC_ASM
-extern void __glapi_sparc_icache_flush(unsigned int *);
-#endif
-
-static void
-fill_in_entrypoint_offset(_glapi_proc entrypoint, GLuint offset);
-
-/**
- * Generate a dispatch function (entrypoint) which jumps through
- * the given slot number (offset) in the current dispatch table.
- * We need assembly language in order to accomplish this.
- */
-static _glapi_proc
-generate_entrypoint(GLuint functionOffset)
-{
-#if defined(USE_X86_ASM)
-   /* 32 is chosen as something of a magic offset.  For x86, the dispatch
-    * at offset 32 is the first one where the offset in the
-    * "jmp OFFSET*4(%eax)" can't be encoded in a single byte.
-    */
-   const GLubyte * const template_func = gl_dispatch_functions_start 
-     + (DISPATCH_FUNCTION_SIZE * 32);
-   GLubyte * const code = (GLubyte *) malloc(DISPATCH_FUNCTION_SIZE);
-
-
-   if ( code != NULL ) {
-      (void) memcpy(code, template_func, DISPATCH_FUNCTION_SIZE);
-      fill_in_entrypoint_offset( (_glapi_proc) code, functionOffset );
-   }
-
-   return (_glapi_proc) code;
-#elif defined(USE_SPARC_ASM)
-
-#if defined(PTHREADS) || defined(GLX_USE_TLS)
-   static const unsigned int template[] = {
-      0x07000000, /* sethi %hi(0), %g3 */
-      0x8210000f, /* mov  %o7, %g1 */
-      0x40000000, /* call */
-      0x9e100001, /* mov  %g1, %o7 */
-   };
-#ifdef GLX_USE_TLS
-   extern unsigned int __glapi_sparc_tls_stub;
-   unsigned long call_dest = (unsigned long ) &__glapi_sparc_tls_stub;
-#else
-   extern unsigned int __glapi_sparc_pthread_stub;
-   unsigned long call_dest = (unsigned long ) &__glapi_sparc_pthread_stub;
-#endif
-   unsigned int *code = (unsigned int *) malloc(sizeof(template));
-   if (code) {
-      code[0] = template[0] | (functionOffset & 0x3fffff);
-      code[1] = template[1];
-      __glapi_sparc_icache_flush(&code[0]);
-      code[2] = template[2] |
-         (((call_dest - ((unsigned long) &code[2]))
-	   >> 2) & 0x3fffffff);
-      code[3] = template[3];
-      __glapi_sparc_icache_flush(&code[2]);
-   }
-   return (_glapi_proc) code;
-#endif
-
-#else
-   (void) functionOffset;
-   return NULL;
-#endif /* USE_*_ASM */
-}
-
-
-/**
- * This function inserts a new dispatch offset into the assembly language
- * stub that was generated with the preceeding function.
- */
-static void
-fill_in_entrypoint_offset(_glapi_proc entrypoint, GLuint offset)
-{
-#if defined(USE_X86_ASM)
-   GLubyte * const code = (GLubyte *) entrypoint;
-
-#if DISPATCH_FUNCTION_SIZE == 32
-   *((unsigned int *)(code + 11)) = 4 * offset;
-   *((unsigned int *)(code + 22)) = 4 * offset;
-#elif DISPATCH_FUNCTION_SIZE == 16 && defined( GLX_USE_TLS )
-   *((unsigned int *)(code +  8)) = 4 * offset;
-#elif DISPATCH_FUNCTION_SIZE == 16
-   *((unsigned int *)(code +  7)) = 4 * offset;
-#else
-# error Invalid DISPATCH_FUNCTION_SIZE!
-#endif
-
-#elif defined(USE_SPARC_ASM)
-   unsigned int *code = (unsigned int *) entrypoint;
-   code[0] &= ~0x3fffff;
-   code[0] |= (offset * sizeof(void *)) & 0x3fffff;
-   __glapi_sparc_icache_flush(&code[0]);
-#else
-
-   /* an unimplemented architecture */
-   (void) entrypoint;
-   (void) offset;
-
-#endif /* USE_*_ASM */
-}
-
 
 /**
  * strdup() is actually not a standard ANSI C or POSIX routine.
diff --git a/src/mesa/glapi/glapi_priv.h b/src/mesa/glapi/glapi_priv.h
index 05eda99ccf..7cd81ee8dc 100644
--- a/src/mesa/glapi/glapi_priv.h
+++ b/src/mesa/glapi/glapi_priv.h
@@ -40,4 +40,27 @@ extern void
 init_glapi_relocs_once(void);
 
 
+extern _glapi_proc
+generate_entrypoint(GLuint functionOffset);
+
+
+extern void
+fill_in_entrypoint_offset(_glapi_proc entrypoint, GLuint offset);
+
+
+extern _glapi_proc
+get_entrypoint_address(GLuint functionOffset);
+
+
+#if defined(USE_X64_64_ASM) && defined(GLX_USE_TLS)
+# define DISPATCH_FUNCTION_SIZE  16
+#elif defined(USE_X86_ASM)
+# if defined(THREADS) && !defined(GLX_USE_TLS)
+#  define DISPATCH_FUNCTION_SIZE  32
+# else
+#  define DISPATCH_FUNCTION_SIZE  16
+# endif
+#endif
+
+
 #endif
diff --git a/src/mesa/sources.mak b/src/mesa/sources.mak
index 9f2e4e5157..74885548e5 100644
--- a/src/mesa/sources.mak
+++ b/src/mesa/sources.mak
@@ -88,6 +88,7 @@ MAIN_SOURCES = \
 GLAPI_SOURCES = \
 	glapi/glapi.c \
 	glapi/glapi_dispatch.c \
+	glapi/glapi_entrypoint.c \
 	glapi/glapi_getproc.c \
 	glapi/glapi_nop.c \
 	glapi/glthread.c
-- 
cgit v1.2.3


From 20ed2445b3b98f2b49a26ad4b45859a908583311 Mon Sep 17 00:00:00 2001
From: George Sapountzis <gsapountzis@gmail.com>
Date: Tue, 9 Mar 2010 22:03:24 +0200
Subject: glapi: allow for any mangle prefix

---
 src/mesa/glapi/glapi_getproc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/mesa/glapi/glapi_getproc.c b/src/mesa/glapi/glapi_getproc.c
index bd930b875d..46b466920b 100644
--- a/src/mesa/glapi/glapi_getproc.c
+++ b/src/mesa/glapi/glapi_getproc.c
@@ -63,7 +63,7 @@ find_entry( const char * n )
    for (i = 0; static_functions[i].Name_offset >= 0; i++) {
       const char *testName = gl_string_table + static_functions[i].Name_offset;
 #ifdef MANGLE
-      /* skip the "m" prefix on the name */
+      /* skip the prefix on the name */
       if (strcmp(testName, n + 1) == 0)
 #else
       if (strcmp(testName, n) == 0)
@@ -424,7 +424,8 @@ _glapi_get_proc_address(const char *funcName)
    GLuint i;
 
 #ifdef MANGLE
-   if (funcName[0] != 'm' || funcName[1] != 'g' || funcName[2] != 'l')
+   /* skip the prefix on the name */
+   if (funcName[1] != 'g' || funcName[2] != 'l')
       return NULL;
 #else
    if (funcName[0] != 'g' || funcName[1] != 'l')
-- 
cgit v1.2.3


From 3355ae1925dc3c868ec196d09c3a6121ed77849a Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Tue, 9 Mar 2010 20:43:13 +0000
Subject: glapi: Fix scons build -- list new file.

---
 src/mesa/SConscript | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/SConscript b/src/mesa/SConscript
index 0a25dccde5..e80ec5ee88 100644
--- a/src/mesa/SConscript
+++ b/src/mesa/SConscript
@@ -249,6 +249,7 @@ if env['platform'] != 'winddk':
 	glapi_sources = [
 		'glapi/glapi.c',
 		'glapi/glapi_dispatch.c',
+		'glapi/glapi_entrypoint.c',
 		'glapi/glapi_getproc.c',
 		'glapi/glapi_nop.c',
 		'glapi/glthread.c',
-- 
cgit v1.2.3


From ac2b7835d5e61629f0a1f8b6c35eb1efa2ffbfa2 Mon Sep 17 00:00:00 2001
From: Maciej Cencora <m.cencora@gmail.com>
Date: Tue, 9 Mar 2010 21:41:00 +0100
Subject: radeon: add hw accelerated glReadPixel support (not enabled yet)

---
 src/mesa/drivers/dri/r200/Makefile              |   1 +
 src/mesa/drivers/dri/r200/radeon_pixel_read.c   |   1 +
 src/mesa/drivers/dri/r300/Makefile              |   3 +-
 src/mesa/drivers/dri/r300/radeon_pixel_read.c   |   1 +
 src/mesa/drivers/dri/r600/Makefile              |   3 +-
 src/mesa/drivers/dri/r600/radeon_pixel_read.c   |   1 +
 src/mesa/drivers/dri/radeon/Makefile            |   1 +
 src/mesa/drivers/dri/radeon/radeon_common.h     |   6 +
 src/mesa/drivers/dri/radeon/radeon_pixel_read.c | 188 ++++++++++++++++++++++++
 9 files changed, 203 insertions(+), 2 deletions(-)
 create mode 120000 src/mesa/drivers/dri/r200/radeon_pixel_read.c
 create mode 120000 src/mesa/drivers/dri/r300/radeon_pixel_read.c
 create mode 120000 src/mesa/drivers/dri/r600/radeon_pixel_read.c
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_pixel_read.c

diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile
index 3f87100570..9ea81fd505 100644
--- a/src/mesa/drivers/dri/r200/Makefile
+++ b/src/mesa/drivers/dri/r200/Makefile
@@ -21,6 +21,7 @@ RADEON_COMMON_SOURCES = \
 	radeon_fbo.c \
 	radeon_lock.c \
 	radeon_mipmap_tree.c \
+	radeon_pixel_read.c \
 	radeon_queryobj.c \
 	radeon_span.c \
 	radeon_texture.c \
diff --git a/src/mesa/drivers/dri/r200/radeon_pixel_read.c b/src/mesa/drivers/dri/r200/radeon_pixel_read.c
new file mode 120000
index 0000000000..3b03803126
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_pixel_read.c
@@ -0,0 +1 @@
+../radeon/radeon_pixel_read.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile
index 4257a32b89..2245998c95 100644
--- a/src/mesa/drivers/dri/r300/Makefile
+++ b/src/mesa/drivers/dri/r300/Makefile
@@ -31,8 +31,9 @@ RADEON_COMMON_SOURCES = \
 	radeon_fbo.c \
 	radeon_lock.c \
 	radeon_mipmap_tree.c \
-	radeon_span.c \
+	radeon_pixel_read.c \
 	radeon_queryobj.c \
+	radeon_span.c \
 	radeon_texture.c \
 	radeon_tex_copy.c \
 	radeon_tex_getimage.c \
diff --git a/src/mesa/drivers/dri/r300/radeon_pixel_read.c b/src/mesa/drivers/dri/r300/radeon_pixel_read.c
new file mode 120000
index 0000000000..3b03803126
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_pixel_read.c
@@ -0,0 +1 @@
+../radeon/radeon_pixel_read.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/Makefile b/src/mesa/drivers/dri/r600/Makefile
index f76859d11e..17915621ee 100644
--- a/src/mesa/drivers/dri/r600/Makefile
+++ b/src/mesa/drivers/dri/r600/Makefile
@@ -31,9 +31,10 @@ RADEON_COMMON_SOURCES = \
 	radeon_fbo.c \
 	radeon_lock.c \
 	radeon_mipmap_tree.c \
+	radeon_pixel_read.c \
+	radeon_queryobj.c \
 	radeon_span.c \
 	radeon_texture.c \
-	radeon_queryobj.c \
 	radeon_tex_copy.c \
 	radeon_tex_getimage.c \
 	radeon_tile.c
diff --git a/src/mesa/drivers/dri/r600/radeon_pixel_read.c b/src/mesa/drivers/dri/r600/radeon_pixel_read.c
new file mode 120000
index 0000000000..3b03803126
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_pixel_read.c
@@ -0,0 +1 @@
+../radeon/radeon_pixel_read.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/radeon/Makefile b/src/mesa/drivers/dri/radeon/Makefile
index 6904ebbee3..19df62742e 100644
--- a/src/mesa/drivers/dri/radeon/Makefile
+++ b/src/mesa/drivers/dri/radeon/Makefile
@@ -22,6 +22,7 @@ RADEON_COMMON_SOURCES = \
 	radeon_fbo.c \
 	radeon_lock.c \
 	radeon_mipmap_tree.c \
+	radeon_pixel_read.c \
 	radeon_queryobj.c \
 	radeon_span.c \
 	radeon_texture.c \
diff --git a/src/mesa/drivers/dri/radeon/radeon_common.h b/src/mesa/drivers/dri/radeon/radeon_common.h
index cd01c9984e..35b3f08fff 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common.h
+++ b/src/mesa/drivers/dri/radeon/radeon_common.h
@@ -44,6 +44,12 @@ radeon_renderbuffer_set_bo(struct radeon_renderbuffer *rb,
 struct radeon_renderbuffer *
 radeon_create_renderbuffer(gl_format format, __DRIdrawable *driDrawPriv);
 
+void
+radeonReadPixels(GLcontext * ctx,
+				GLint x, GLint y, GLsizei width, GLsizei height,
+				GLenum format, GLenum type,
+				const struct gl_pixelstore_attrib *pack, GLvoid * pixels);
+
 void radeon_check_front_buffer_rendering(GLcontext *ctx);
 static inline struct radeon_renderbuffer *radeon_renderbuffer(struct gl_renderbuffer *rb)
 {
diff --git a/src/mesa/drivers/dri/radeon/radeon_pixel_read.c b/src/mesa/drivers/dri/radeon/radeon_pixel_read.c
new file mode 100644
index 0000000000..27841938e6
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_pixel_read.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright (C) 2010 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "stdint.h"
+#include "main/bufferobj.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/state.h"
+#include "swrast/swrast.h"
+
+#include "radeon_common_context.h"
+#include "radeon_debug.h"
+#include "radeon_mipmap_tree.h"
+
+static gl_format gl_format_and_type_to_mesa_format(GLenum format, GLenum type)
+{
+    switch (format)
+    {
+        case GL_RGB:
+            switch (type) {
+                case GL_UNSIGNED_SHORT_5_6_5:
+                    return MESA_FORMAT_RGB565;
+                case GL_UNSIGNED_SHORT_5_6_5_REV:
+                    return MESA_FORMAT_RGB565_REV;
+            }
+            break;
+        case GL_RGBA:
+            switch (type) {
+                case GL_UNSIGNED_BYTE:
+                    return MESA_FORMAT_RGBA8888_REV;
+                case GL_FLOAT:
+                    return MESA_FORMAT_RGBA_FLOAT32;
+                case GL_UNSIGNED_SHORT_4_4_4_4:
+                    return MESA_FORMAT_ARGB4444;
+                case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+                    return MESA_FORMAT_ARGB4444;
+                case GL_UNSIGNED_SHORT_5_5_5_1:
+                    return MESA_FORMAT_RGBA5551;
+                case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+                    return MESA_FORMAT_ARGB1555_REV;
+                case GL_UNSIGNED_INT_8_8_8_8:
+                    return MESA_FORMAT_ARGB8888;
+                case GL_UNSIGNED_INT_8_8_8_8_REV:
+                    return MESA_FORMAT_ARGB8888_REV;
+            }
+            break;
+    }
+
+    return MESA_FORMAT_NONE;
+}
+
+static GLboolean
+do_blit_readpixels(GLcontext * ctx,
+                   GLint x, GLint y, GLsizei width, GLsizei height,
+                   GLenum format, GLenum type,
+                   const struct gl_pixelstore_attrib *pack, GLvoid * pixels)
+{
+    radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+    const struct radeon_renderbuffer *rrb = radeon_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer);
+    const gl_format dst_format = gl_format_and_type_to_mesa_format(format, type);
+    unsigned dst_rowstride, dst_imagesize, aligned_rowstride, flip_y;
+    struct radeon_bo *dst_buffer;
+    GLint dst_x = 0, dst_y = 0;
+
+    /* It's not worth if number of pixels to copy is really small */
+    if (width * height < 100) {
+        return GL_FALSE;
+    }
+
+    if (dst_format == MESA_FORMAT_NONE ||
+        !radeon->vtbl.check_blit(dst_format) || !radeon->vtbl.blit) {
+        return GL_FALSE;
+    }
+
+    if (ctx->_ImageTransferState) {
+        return GL_FALSE;
+    }
+
+    if (pack->SwapBytes || pack->LsbFirst) {
+        return GL_FALSE;
+    }
+
+    if (pack->RowLength > 0) {
+        dst_rowstride = pack->RowLength;
+    } else {
+        dst_rowstride = width;
+    }
+
+    if (!_mesa_clip_copytexsubimage(ctx, &dst_x, &dst_y, &x, &y, &width, &height)) {
+        return GL_TRUE;
+    }
+    assert(x >= 0 && y >= 0);
+
+    aligned_rowstride = get_texture_image_row_stride(radeon, dst_format, dst_rowstride, 0);
+    dst_imagesize = get_texture_image_size(dst_format,
+                                           aligned_rowstride,
+                                           height, 1, 0);
+    dst_buffer = radeon_bo_open(radeon->radeonScreen->bom, 0, dst_imagesize, 1024, RADEON_GEM_DOMAIN_GTT, 0);
+
+    /* Disable source Y flipping for FBOs */
+    flip_y = (ctx->ReadBuffer->Name == 0);
+    if (pack->Invert) {
+        y = rrb->base.Height - height - y;
+        flip_y = !flip_y;
+    }
+
+    if (radeon->vtbl.blit(ctx,
+                          rrb->bo,
+                          rrb->draw_offset,
+                          rrb->base.Format,
+                          rrb->pitch / rrb->cpp,
+                          rrb->base.Width,
+                          rrb->base.Height,
+                          x,
+                          y,
+                          dst_buffer,
+                          0, /* dst_offset */
+                          dst_format,
+                          aligned_rowstride / _mesa_get_format_bytes(dst_format),
+                          width,
+                          height,
+                          0, /* dst_x */
+                          0, /* dst_y */
+                          width,
+                          height,
+                          flip_y))
+    {
+        radeon_bo_map(dst_buffer, 0);
+        dst_rowstride *= _mesa_get_format_bytes(dst_format);
+        copy_rows(pixels, dst_rowstride, dst_buffer->ptr,
+                  aligned_rowstride, height, dst_rowstride);
+        radeon_bo_unmap(dst_buffer);
+        radeon_bo_unref(dst_buffer);
+        return GL_TRUE;
+    } else {
+        radeon_bo_unref(dst_buffer);
+        return GL_FALSE;
+    }
+}
+
+void
+radeonReadPixels(GLcontext * ctx,
+                 GLint x, GLint y, GLsizei width, GLsizei height,
+                 GLenum format, GLenum type,
+                 const struct gl_pixelstore_attrib *pack, GLvoid * pixels)
+{
+    if (do_blit_readpixels(ctx, x, y, width, height, format, type, pack, pixels))
+        return;
+
+    /* Update Mesa state before calling down into _swrast_ReadPixels, as
+     * the spans code requires the computed buffer states to be up to date,
+     * but _swrast_ReadPixels only updates Mesa state after setting up
+     * the spans code.
+     */
+
+    radeon_print(RADEON_FALLBACKS, RADEON_NORMAL,
+                 "Falling back to sw for ReadPixels (format %s, type %s)\n",
+                 _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type));
+
+    if (ctx->NewState)
+        _mesa_update_state(ctx);
+
+    _swrast_ReadPixels(ctx, x, y, width, height, format, type, pack, pixels);
+}
-- 
cgit v1.2.3


From 67108b5d12e0526ebedcdf2dbeeadfdbd0782161 Mon Sep 17 00:00:00 2001
From: Maciej Cencora <m.cencora@gmail.com>
Date: Tue, 9 Mar 2010 21:42:00 +0100
Subject: r300: enable HW accelerated gl(Read/Copy/Draw)Pixels

---
 src/mesa/drivers/dri/r300/r300_state.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index 5979dedac4..8739dcbafb 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -46,6 +46,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "main/simple_list.h"
 #include "main/api_arrayelt.h"
 
+#include "drivers/common/meta.h"
 #include "swrast/swrast.h"
 #include "swrast_setup/swrast_setup.h"
 #include "shader/prog_parameter.h"
@@ -2326,8 +2327,12 @@ void r300InitStateFuncs(struct dd_function_table *functions)
 	functions->ClipPlane = r300ClipPlane;
 	functions->Scissor = radeonScissor;
 
-	functions->DrawBuffer		= radeonDrawBuffer;
-	functions->ReadBuffer		= radeonReadBuffer;
+	functions->DrawBuffer = radeonDrawBuffer;
+	functions->ReadBuffer = radeonReadBuffer;
+
+	functions->CopyPixels = _mesa_meta_CopyPixels;
+	functions->DrawPixels = _mesa_meta_DrawPixels;
+	functions->ReadPixels = radeonReadPixels;
 }
 
 void r300InitShaderFunctions(r300ContextPtr r300)
-- 
cgit v1.2.3


From bd2239e4976157d1df4265733b5b4324e5ad7a50 Mon Sep 17 00:00:00 2001
From: Maciej Cencora <m.cencora@gmail.com>
Date: Tue, 9 Mar 2010 21:42:41 +0100
Subject: progs/tests: use first color attachement

Allows the tests to be run on drivers that supports only one color attachement.
---
 progs/tests/fbotest1.c | 6 +++---
 progs/tests/fbotest2.c | 6 +++---
 progs/tests/fbotest3.c | 6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/progs/tests/fbotest1.c b/progs/tests/fbotest1.c
index 0cd7f95c35..a95fdff74c 100644
--- a/progs/tests/fbotest1.c
+++ b/progs/tests/fbotest1.c
@@ -36,8 +36,8 @@ Display( void )
 
    /* draw to user framebuffer */
    glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, MyFB);
-   glDrawBuffer(GL_COLOR_ATTACHMENT1_EXT);
-   glReadBuffer(GL_COLOR_ATTACHMENT1_EXT);
+   glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
+   glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
 
    status = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT);
    if (status != GL_FRAMEBUFFER_COMPLETE_EXT) {
@@ -161,7 +161,7 @@ Init( void )
    assert(i == MyFB);
 
    CheckError(__LINE__);
-   glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT,
+   glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT,
                                 GL_RENDERBUFFER_EXT, MyRB);
 
    glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_RGB, Width, Height);
diff --git a/progs/tests/fbotest2.c b/progs/tests/fbotest2.c
index f9c506193f..872b46279e 100644
--- a/progs/tests/fbotest2.c
+++ b/progs/tests/fbotest2.c
@@ -40,8 +40,8 @@ Display( void )
 
    /* draw to user framebuffer */
    glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, MyFB);
-   glDrawBuffer(GL_COLOR_ATTACHMENT1_EXT);
-   glReadBuffer(GL_COLOR_ATTACHMENT1_EXT);
+   glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
+   glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
 
    status = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT);
    if (status != GL_FRAMEBUFFER_COMPLETE_EXT) {
@@ -163,7 +163,7 @@ Init( void )
    glGenRenderbuffersEXT(1, &ColorRb);
    glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, ColorRb);
    assert(glIsRenderbufferEXT(ColorRb));
-   glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT,
+   glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT,
                                 GL_RENDERBUFFER_EXT, ColorRb);
    glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_RGB, Width, Height);
 
diff --git a/progs/tests/fbotest3.c b/progs/tests/fbotest3.c
index 8e288b38b8..c176f82d2b 100644
--- a/progs/tests/fbotest3.c
+++ b/progs/tests/fbotest3.c
@@ -50,8 +50,8 @@ Display( void )
 
    /* draw to user framebuffer */
    glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, MyFB);
-   glDrawBuffer(GL_COLOR_ATTACHMENT1_EXT);
-   glReadBuffer(GL_COLOR_ATTACHMENT1_EXT);
+   glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
+   glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
 
    status = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT);
    if (status != GL_FRAMEBUFFER_COMPLETE_EXT) {
@@ -189,7 +189,7 @@ Init( void )
    glGenRenderbuffersEXT(1, &ColorRb);
    glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, ColorRb);
    assert(glIsRenderbufferEXT(ColorRb));
-   glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT,
+   glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT,
                                 GL_RENDERBUFFER_EXT, ColorRb);
    glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_RGB, Width, Height);
 
-- 
cgit v1.2.3


From a17563c7ddfa58fe7f09d22a62a10f3488ef3147 Mon Sep 17 00:00:00 2001
From: Maciej Cencora <m.cencora@gmail.com>
Date: Sun, 7 Mar 2010 14:34:21 +0100
Subject: radeon/r200/r300/r600: add is_format_renderable function

---
 src/mesa/drivers/dri/r200/r200_context.c           |  1 +
 src/mesa/drivers/dri/r300/r300_context.c           |  6 ++++
 src/mesa/drivers/dri/r300/r300_tex.c               | 39 ++++++++++++++++++++++
 src/mesa/drivers/dri/r300/r300_tex.h               |  3 ++
 src/mesa/drivers/dri/r600/r600_context.c           |  1 +
 .../drivers/dri/radeon/radeon_common_context.h     |  1 +
 src/mesa/drivers/dri/radeon/radeon_context.c       |  1 +
 src/mesa/drivers/dri/radeon/radeon_texture.c       | 16 +++++++++
 src/mesa/drivers/dri/radeon/radeon_texture.h       |  2 ++
 9 files changed, 70 insertions(+)

diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c
index dad2580e08..4f1a56658c 100644
--- a/src/mesa/drivers/dri/r200/r200_context.c
+++ b/src/mesa/drivers/dri/r200/r200_context.c
@@ -266,6 +266,7 @@ static void r200_init_vtbl(radeonContextPtr radeon)
    radeon->vtbl.emit_query_finish = r200_emit_query_finish;
    radeon->vtbl.check_blit = r200_check_blit;
    radeon->vtbl.blit = r200_blit;
+   radeon->vtbl.is_format_renderable = radeonIsFormatRenderable;
 }
 
 
diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
index ff35cd5275..364e0ba6b6 100644
--- a/src/mesa/drivers/dri/r300/r300_context.c
+++ b/src/mesa/drivers/dri/r300/r300_context.c
@@ -321,6 +321,12 @@ static void r300_init_vtbl(radeonContextPtr radeon)
 
 	radeon->vtbl.check_blit = r300_check_blit;
 	radeon->vtbl.blit = r300_blit;
+
+	if (radeon->radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+		radeon->vtbl.is_format_renderable = r500IsFormatRenderable;
+	} else {
+		radeon->vtbl.is_format_renderable = r300IsFormatRenderable;
+	}
 }
 
 static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen)
diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c
index 8dd8507395..baef206bc2 100644
--- a/src/mesa/drivers/dri/r300/r300_tex.c
+++ b/src/mesa/drivers/dri/r300/r300_tex.c
@@ -308,6 +308,45 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx,
 	return &t->base;
 }
 
+unsigned r300IsFormatRenderable(gl_format mesa_format)
+{
+	switch (mesa_format)
+	{
+		case MESA_FORMAT_RGB565:
+		case MESA_FORMAT_RGBA5551:
+		case MESA_FORMAT_RGBA8888:
+		case MESA_FORMAT_RGB565_REV:
+		case MESA_FORMAT_RGBA8888_REV:
+		case MESA_FORMAT_ARGB4444:
+		case MESA_FORMAT_ARGB1555:
+		case MESA_FORMAT_XRGB8888:
+		case MESA_FORMAT_ARGB8888:
+		case MESA_FORMAT_ARGB4444_REV:
+		case MESA_FORMAT_ARGB1555_REV:
+		case MESA_FORMAT_XRGB8888_REV:
+		case MESA_FORMAT_ARGB8888_REV:
+		case MESA_FORMAT_SRGBA8:
+		case MESA_FORMAT_SARGB8:
+		case MESA_FORMAT_SL8:
+		case MESA_FORMAT_A8:
+		case MESA_FORMAT_L8:
+		case MESA_FORMAT_I8:
+		case MESA_FORMAT_Z16:
+			return 1;
+		default:
+			return 0;
+	}
+}
+
+unsigned r500IsFormatRenderable(gl_format mesa_format)
+{
+	if (mesa_format == MESA_FORMAT_S8_Z24) {
+		return 1;
+	} else {
+		return r300IsFormatRenderable(mesa_format);
+	}
+}
+
 void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions)
 {
 	/* Note: we only plug in the functions we implement in the driver
diff --git a/src/mesa/drivers/dri/r300/r300_tex.h b/src/mesa/drivers/dri/r300/r300_tex.h
index 9694e703b8..aca44cd766 100644
--- a/src/mesa/drivers/dri/r300/r300_tex.h
+++ b/src/mesa/drivers/dri/r300/r300_tex.h
@@ -53,4 +53,7 @@ extern void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_tab
 
 int32_t r300TranslateTexFormat(gl_format mesaFormat);
 
+unsigned r300IsFormatRenderable(gl_format mesaFormat);
+unsigned r500IsFormatRenderable(gl_format mesaFormat);
+
 #endif				/* __r300_TEX_H__ */
diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c
index 134e97e7c3..76d5027649 100644
--- a/src/mesa/drivers/dri/r600/r600_context.c
+++ b/src/mesa/drivers/dri/r600/r600_context.c
@@ -239,6 +239,7 @@ static void r600_init_vtbl(radeonContextPtr radeon)
 	radeon->vtbl.emit_query_finish = r600_emit_query_finish;
 	radeon->vtbl.check_blit = r600_check_blit;
 	radeon->vtbl.blit = r600_blit;
+	radeon->vtbl.is_format_renderable = radeonIsFormatRenderable;
 }
 
 static void r600InitConstValues(GLcontext *ctx, radeonScreenPtr screen)
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h
index d1a24e265f..5156c5d0d0 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.h
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h
@@ -539,6 +539,7 @@ struct radeon_context {
                         unsigned reg_width,
                         unsigned reg_height,
                         unsigned flip_y);
+	   unsigned (*is_format_renderable)(gl_format mesa_format);
    } vtbl;
 };
 
diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c
index 878a453bd5..56aba16e9e 100644
--- a/src/mesa/drivers/dri/radeon/radeon_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_context.c
@@ -200,6 +200,7 @@ static void r100_init_vtbl(radeonContextPtr radeon)
    radeon->vtbl.emit_query_finish = r100_emit_query_finish;
    radeon->vtbl.check_blit = r100_check_blit;
    radeon->vtbl.blit = r100_blit;
+   radeon->vtbl.is_format_renderable = radeonIsFormatRenderable;
 }
 
 /* Create the device specific context.
diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c
index 3ccc711253..2b655fbd95 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texture.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texture.c
@@ -1006,3 +1006,19 @@ void radeonTexSubImage3D(GLcontext * ctx, GLenum target, GLint level,
 	radeon_texsubimage(ctx, 3, target, level, xoffset, yoffset, zoffset, width, height, depth, 0,
 		format, type, pixels, packing, texObj, texImage, 0);
 }
+
+unsigned radeonIsFormatRenderable(gl_format mesa_format)
+{
+	if (mesa_format == _dri_texformat_argb8888 || mesa_format == _dri_texformat_rgb565 ||
+		mesa_format == _dri_texformat_argb1555 || mesa_format == _dri_texformat_argb4444)
+		return 1;
+
+	switch (mesa_format)
+	{
+		case MESA_FORMAT_Z16:
+		case MESA_FORMAT_S8_Z24:
+			return 1;
+		default:
+			return 0;
+	}
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.h b/src/mesa/drivers/dri/radeon/radeon_texture.h
index f09dd65214..4ce639ea34 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texture.h
+++ b/src/mesa/drivers/dri/radeon/radeon_texture.h
@@ -135,4 +135,6 @@ void radeonCopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level,
 			GLint x, GLint y,
 			GLsizei width, GLsizei height);
 
+unsigned radeonIsFormatRenderable(gl_format mesa_format);
+
 #endif
-- 
cgit v1.2.3


From fd05067c9912e7ee83058a48d6e4c2cd7f262665 Mon Sep 17 00:00:00 2001
From: Maciej Cencora <m.cencora@gmail.com>
Date: Sun, 7 Mar 2010 14:26:21 +0100
Subject: r300: add support for more rendering formats

---
 src/mesa/drivers/dri/r300/r300_cmdbuf.c | 35 ++++++++++---------
 src/mesa/drivers/dri/r300/r300_state.c  | 61 +++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+), 17 deletions(-)

diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
index 6cfa5686f4..e2dbb1dbf4 100644
--- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c
+++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
@@ -332,36 +332,37 @@ void r300_emit_cb_setup(struct r300_context *r300,
     assert(offset % 32 == 0);
 
     switch (format) {
-        case MESA_FORMAT_RGB565:
-            assert(_mesa_little_endian());
-            cbpitch |= R300_COLOR_FORMAT_RGB565;
+        case MESA_FORMAT_SL8:
+        case MESA_FORMAT_A8:
+        case MESA_FORMAT_L8:
+        case MESA_FORMAT_I8:
+            cbpitch |= R300_COLOR_FORMAT_I8;
             break;
+        case MESA_FORMAT_RGB565:
         case MESA_FORMAT_RGB565_REV:
-            assert(!_mesa_little_endian());
             cbpitch |= R300_COLOR_FORMAT_RGB565;
             break;
         case MESA_FORMAT_ARGB4444:
-            assert(_mesa_little_endian());
-            cbpitch |= R300_COLOR_FORMAT_ARGB4444;
-            break;
         case MESA_FORMAT_ARGB4444_REV:
-            assert(!_mesa_little_endian());
             cbpitch |= R300_COLOR_FORMAT_ARGB4444;
             break;
+        case MESA_FORMAT_RGBA5551:
         case MESA_FORMAT_ARGB1555:
-            assert(_mesa_little_endian());
-            cbpitch |= R300_COLOR_FORMAT_ARGB1555;
-            break;
         case MESA_FORMAT_ARGB1555_REV:
-            assert(!_mesa_little_endian());
             cbpitch |= R300_COLOR_FORMAT_ARGB1555;
             break;
+        case MESA_FORMAT_RGBA8888:
+        case MESA_FORMAT_RGBA8888_REV:
+        case MESA_FORMAT_XRGB8888:
+        case MESA_FORMAT_ARGB8888:
+        case MESA_FORMAT_XRGB8888_REV:
+        case MESA_FORMAT_ARGB8888_REV:
+        case MESA_FORMAT_SRGBA8:
+        case MESA_FORMAT_SARGB8:
+            cbpitch |= R300_COLOR_FORMAT_ARGB8888;
+            break;
         default:
-            if (cpp == 4) {
-                cbpitch |= R300_COLOR_FORMAT_ARGB8888;
-            } else {
-                _mesa_problem(r300->radeon.glCtx, "unexpected format in emit_cb_offset()");;
-            }
+            _mesa_problem(r300->radeon.glCtx, "unexpected format in emit_cb_offset()");
             break;
     }
 
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index 8739dcbafb..bdd12c6d22 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -2238,6 +2238,63 @@ void r300UpdateShaderStates(r300ContextPtr rmesa)
 	}
 }
 
+#define EASY_US_OUT_FMT(comps, c0, c1, c2, c3) \
+	(R500_OUT_FMT_##comps | R500_C0_SEL_##c0 | R500_C1_SEL_##c1 | \
+	 R500_C2_SEL_##c2 | R500_C3_SEL_##c3)
+static void r300SetupUsOutputFormat(GLcontext *ctx)
+{
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	uint32_t hw_format;
+
+	switch (radeon_get_colorbuffer(&rmesa->radeon)->base.Format)
+	{
+		case MESA_FORMAT_RGBA5551:
+		case MESA_FORMAT_RGBA8888:
+			hw_format = EASY_US_OUT_FMT(C4_8, A, B, G, R);
+			break;
+		case MESA_FORMAT_RGB565_REV:
+		case MESA_FORMAT_RGBA8888_REV:
+			hw_format = EASY_US_OUT_FMT(C4_8, R, G, B, A);
+			break;
+		case MESA_FORMAT_RGB565:
+		case MESA_FORMAT_ARGB4444:
+		case MESA_FORMAT_ARGB1555:
+		case MESA_FORMAT_XRGB8888:
+		case MESA_FORMAT_ARGB8888:
+			hw_format = EASY_US_OUT_FMT(C4_8, B, G, R, A);
+			break;
+		case MESA_FORMAT_ARGB4444_REV:
+		case MESA_FORMAT_ARGB1555_REV:
+		case MESA_FORMAT_XRGB8888_REV:
+		case MESA_FORMAT_ARGB8888_REV:
+			hw_format = EASY_US_OUT_FMT(C4_8, A, R, G, B);
+			break;
+		case MESA_FORMAT_SRGBA8:
+			hw_format = EASY_US_OUT_FMT(C4_10_GAMMA, A, B, G, R);
+			break;
+		case MESA_FORMAT_SARGB8:
+			hw_format = EASY_US_OUT_FMT(C4_10_GAMMA, B, G, R, A);
+			break;
+		case MESA_FORMAT_SL8:
+			hw_format = EASY_US_OUT_FMT(C4_10_GAMMA, A, A, R, A);
+			break;
+		case MESA_FORMAT_A8:
+			hw_format = EASY_US_OUT_FMT(C4_8, A, A, A, A);
+			break;
+		case MESA_FORMAT_L8:
+		case MESA_FORMAT_I8:
+			hw_format = EASY_US_OUT_FMT(C4_8, A, A, R, A);
+			break;
+		default:
+			assert(!"Unsupported format");
+			break;
+	}
+
+	R300_STATECHANGE(rmesa, us_out_fmt);
+	rmesa->hw.us_out_fmt.cmd[1] = hw_format;
+}
+#undef EASY_US_OUT_FMT
+
 /**
  * Called by Mesa after an internal state update.
  */
@@ -2267,6 +2324,10 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state)
 			r300->hw.shade2.cmd[1] &= ~R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST;
 	}
 
+	if (new_state & _NEW_BUFFERS) {
+		r300SetupUsOutputFormat(ctx);
+	}
+
 	r300->radeon.NewGLState |= new_state;
 }
 
-- 
cgit v1.2.3


From b70dcabafce0c1a8a9fbbd84ff196e46b0782ca7 Mon Sep 17 00:00:00 2001
From: Maciej Cencora <m.cencora@gmail.com>
Date: Sun, 7 Mar 2010 22:01:56 +0100
Subject: radeon: mark framebuffer as incomplete if renderbuffer format isn't
 supported by hw

---
 src/mesa/drivers/dri/radeon/radeon_fbo.c         | 126 +++++++++++------------
 src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c |   3 -
 2 files changed, 62 insertions(+), 67 deletions(-)

diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c
index 46664a1755..6398605835 100644
--- a/src/mesa/drivers/dri/radeon/radeon_fbo.c
+++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c
@@ -409,82 +409,51 @@ radeon_framebuffer_renderbuffer(GLcontext * ctx,
    radeon_draw_buffer(ctx, fb);
 }
 
-
-/* TODO: According to EXT_fbo spec internal format of texture image
- * once set during glTexImage call, should be preserved when
- * attaching image to renderbuffer. When HW doesn't support
- * rendering to format of attached image, set framebuffer
- * completeness accordingly in radeon_validate_framebuffer (issue #79).
- */
 static GLboolean
 radeon_update_wrapper(GLcontext *ctx, struct radeon_renderbuffer *rrb, 
 		     struct gl_texture_image *texImage)
 {
-	int retry = 0;
-	gl_format texFormat;
-
 	radeon_print(RADEON_TEXTURE, RADEON_TRACE,
-		"%s(%p, rrb %p, texImage %p) \n",
-		__func__, ctx, rrb, texImage);
-
-restart:
-	if (texImage->TexFormat == _dri_texformat_argb8888) {
-		rrb->base.DataType = GL_UNSIGNED_BYTE;
-		DBG("Render to RGBA8 texture OK\n");
+		"%s(%p, rrb %p, texImage %p, texFormat %s) \n",
+		__func__, ctx, rrb, texImage, _mesa_get_format_name(texImage->TexFormat));
+
+	switch (texImage->TexFormat) {
+		case MESA_FORMAT_RGBA8888:
+		case MESA_FORMAT_RGBA8888_REV:
+		case MESA_FORMAT_ARGB8888:
+		case MESA_FORMAT_ARGB8888_REV:
+		case MESA_FORMAT_XRGB8888:
+		case MESA_FORMAT_XRGB8888_REV:
+		case MESA_FORMAT_RGB565:
+		case MESA_FORMAT_RGB565_REV:
+		case MESA_FORMAT_RGBA5551:
+		case MESA_FORMAT_ARGB1555:
+		case MESA_FORMAT_ARGB1555_REV:
+		case MESA_FORMAT_ARGB4444:
+		case MESA_FORMAT_ARGB4444_REV:
+			rrb->base.DataType = GL_UNSIGNED_BYTE;
+			break;
+		case MESA_FORMAT_Z16:
+			rrb->base.DataType = GL_UNSIGNED_SHORT;
+			break;
+		case MESA_FORMAT_X8_Z24:
+			rrb->base.DataType = GL_UNSIGNED_INT;
+			break;
+		case MESA_FORMAT_S8_Z24:
+			rrb->base.DataType = GL_UNSIGNED_INT_24_8_EXT;
+			break;
 	}
-	else if (texImage->TexFormat == _dri_texformat_rgb565) {
-		rrb->base.DataType = GL_UNSIGNED_BYTE;
-		DBG("Render to RGB5 texture OK\n");
-	}
-	else if (texImage->TexFormat == _dri_texformat_argb1555) {
-		rrb->base.DataType = GL_UNSIGNED_BYTE;
-		DBG("Render to ARGB1555 texture OK\n");
-	}
-	else if (texImage->TexFormat == _dri_texformat_argb4444) {
-		rrb->base.DataType = GL_UNSIGNED_BYTE;
-		DBG("Render to ARGB4444 texture OK\n");
-	}
-	else if (texImage->TexFormat == MESA_FORMAT_Z16) {
-		rrb->base.DataType = GL_UNSIGNED_SHORT;
-		DBG("Render to DEPTH16 texture OK\n");
-	}
-	else if (texImage->TexFormat == MESA_FORMAT_S8_Z24) {
-		rrb->base.DataType = GL_UNSIGNED_INT_24_8_EXT;
-		DBG("Render to DEPTH_STENCIL texture OK\n");
-	}
-	else {
-		/* try redoing the FBO */
-		if (retry == 1) {
-			DBG("Render to texture BAD FORMAT %d\n",
-			    texImage->TexFormat);
-			return GL_FALSE;
-		}
-                /* XXX why is the tex format being set here?
-                 * I think this can be removed.
-                 */
-		texImage->TexFormat = radeonChooseTextureFormat(ctx, texImage->InternalFormat, 0,
-								_mesa_get_format_datatype(texImage->TexFormat),
-								1);
-
-		retry++;
-		goto restart;
-	}
-	
-	texFormat = texImage->TexFormat;
-
-	rrb->base.Format = texFormat;
-
-        rrb->cpp = _mesa_get_format_bytes(texFormat);
+		
+	rrb->cpp = _mesa_get_format_bytes(texImage->TexFormat);
 	rrb->pitch = texImage->Width * rrb->cpp;
+	rrb->base.Format = texImage->TexFormat;
 	rrb->base.InternalFormat = texImage->InternalFormat;
-        rrb->base._BaseFormat = _mesa_base_fbo_format(ctx, rrb->base.InternalFormat);
-
+	rrb->base._BaseFormat = _mesa_base_fbo_format(ctx, rrb->base.InternalFormat);
 	rrb->base.Width = texImage->Width;
 	rrb->base.Height = texImage->Height;
-	
 	rrb->base.Delete = radeon_delete_renderbuffer;
 	rrb->base.AllocStorage = radeon_nop_alloc_storage;
-	
+
 	return GL_TRUE;
 }
 
@@ -607,6 +576,35 @@ radeon_finish_render_texture(GLcontext * ctx,
 static void
 radeon_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb)
 {
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	gl_format mesa_format;
+	int i;
+
+	for (i = -2; i < (GLint) ctx->Const.MaxColorAttachments; i++) {
+		struct gl_renderbuffer_attachment *att;
+		if (i == -2) {
+			att = &fb->Attachment[BUFFER_DEPTH];
+		} else if (i == -1) {
+			att = &fb->Attachment[BUFFER_STENCIL];
+		} else {
+			att = &fb->Attachment[BUFFER_COLOR0 + i];
+		}
+
+		if (att->Type == GL_TEXTURE) {
+			mesa_format = att->Texture->Image[att->CubeMapFace][att->TextureLevel]->TexFormat;
+		} else {
+			/* All renderbuffer formats are renderable, but not sampable */
+			continue;
+		}
+
+		if (!radeon->vtbl.is_format_renderable(mesa_format)){
+			fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED;
+			radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+						"%s: HW doesn't support format %s as output format of attachment %d\n",
+						__FUNCTION__, _mesa_get_format_name(mesa_format), i);
+			return;
+		}
+	}
 }
 
 void radeon_fbo_init(struct radeon_context *radeon)
diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
index c6cc417dd6..78c5f5dd57 100644
--- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
+++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
@@ -468,12 +468,9 @@ static void migrate_image_to_miptree(radeon_mipmap_tree *mt,
 
 		radeon_mipmap_level *srclvl = &image->mt->levels[image->mtlevel];
 
-		/* TODO: bring back these assertions once the FBOs are fixed */
-#if 0
 		assert(image->mtlevel == level);
 		assert(srclvl->size == dstlvl->size);
 		assert(srclvl->rowstride == dstlvl->rowstride);
-#endif
 
 		radeon_bo_map(image->mt->bo, GL_FALSE);
 
-- 
cgit v1.2.3


From eeaf1e0519912b2ca3a8bce56c35548f9e0b73ca Mon Sep 17 00:00:00 2001
From: Maciej Cencora <m.cencora@gmail.com>
Date: Tue, 9 Mar 2010 23:13:19 +0100
Subject: r300: don't crash if there's no colorbuffer

---
 src/mesa/drivers/dri/r300/r300_state.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index bdd12c6d22..8748941241 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -2245,8 +2245,13 @@ static void r300SetupUsOutputFormat(GLcontext *ctx)
 {
 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
 	uint32_t hw_format;
+	struct radeon_renderbuffer *rrb = radeon_get_colorbuffer(&rmesa->radeon);
 
-	switch (radeon_get_colorbuffer(&rmesa->radeon)->base.Format)
+	if (!rrb) {
+		return;
+	}
+	
+	switch (rrb->base.Format)
 	{
 		case MESA_FORMAT_RGBA5551:
 		case MESA_FORMAT_RGBA8888:
-- 
cgit v1.2.3


From 7f6d2754d586545ab6c970acffdd897294879039 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 8 Mar 2010 16:08:33 -0800
Subject: i965: Fix up VP constbuf leak on program delete.

---
 src/mesa/drivers/dri/i965/brw_program.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index c78f7b38ae..1fd957b3ad 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -95,9 +95,17 @@ static void brwDeleteProgram( GLcontext *ctx,
 			      struct gl_program *prog )
 {
    if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
-      struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
-      struct brw_fragment_program *brw_fprog = brw_fragment_program(fprog);
-      dri_bo_unreference(brw_fprog->const_buffer);
+      struct gl_fragment_program *fp = (struct gl_fragment_program *) prog;
+      struct brw_fragment_program *brw_fp = brw_fragment_program(fp);
+
+      dri_bo_unreference(brw_fp->const_buffer);
+   }
+
+   if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
+      struct gl_vertex_program *vp = (struct gl_vertex_program *) prog;
+      struct brw_vertex_program *brw_vp = brw_vertex_program(vp);
+
+      dri_bo_unreference(brw_vp->const_buffer);
    }
 
    _mesa_delete_program( ctx, prog );
-- 
cgit v1.2.3


From f6f547d87ea68f44c50a0b0231b7360ca94b2975 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 9 Mar 2010 09:56:42 -0800
Subject: i965: Fix nested loops in the VS.

We were patching up all the break and continues between the start of
our loop and the end of our loop, even if they were breaks/continues
for an inner loop.  Avoiding patching already patched breaks/continues
fixes piglit glsl-vs-loop-nested.
---
 src/mesa/drivers/dri/i965/brw_vs_emit.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index a7c4b58972..a48804a660 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -1717,11 +1717,13 @@ void brw_vs_emit(struct brw_vs_compile *c )
             /* patch all the BREAK/CONT instructions from last BEGINLOOP */
             while (inst0 > loop_inst[loop_depth]) {
                inst0--;
-               if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+               if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+		   inst0->bits3.if_else.jump_count == 0) {
                   inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
                   inst0->bits3.if_else.pop_count = 0;
                }
-               else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+               else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+			inst0->bits3.if_else.jump_count == 0) {
                   inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
                   inst0->bits3.if_else.pop_count = 0;
                }
-- 
cgit v1.2.3


From 280abdacf900d591ef909cf697f0c5679389c3f6 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 9 Mar 2010 11:31:28 -0800
Subject: i965: Print the offsets for WHILE and BREAK in disasm.

---
 src/mesa/drivers/dri/i965/brw_disasm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index a8f6b993ac..54699cf8d3 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -74,9 +74,9 @@ struct {
     [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 },
     [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
     [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 },
-    [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 },
+    [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 },
     [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
-    [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 },
+    [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 },
     [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
     [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
     [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
-- 
cgit v1.2.3


From 6b194dab6b4d9f12cdd54c699b23c0d3420a49c2 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 9 Mar 2010 11:56:14 -0800
Subject: i965: Unalias all GLSL source regs from the destination regs used.

We were doing it ad-hoc before, as instructions with potential
aliasing problems were identified.  But thanks to swizzling basically
anything can have aliasing, so just do it generally at source reg
setup time.  This is somewhat inefficient, because sometimes an
operation doesn't need unaliasing protection if the swizzling is safe,
but the unaliasing before didn't cover those cases either.

Fixes piglit glsl-fs-loop.
---
 src/mesa/drivers/dri/i965/brw_wm_glsl.c | 138 ++++++--------------------------
 1 file changed, 25 insertions(+), 113 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index 562608e2ec..13306accda 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -614,112 +614,6 @@ static void invoke_subroutine( struct brw_wm_compile *c,
     }
 }
 
-/* Workaround for using brw_wm_emit.c's emit functions, which expect
- * destination regs to be uniquely written.  Moves arguments out to
- * temporaries as necessary for instructions which use their destination as
- * a temporary.
- */
-static void
-unalias3(struct brw_wm_compile *c,
-	 void (*func)(struct brw_compile *c,
-		      const struct brw_reg *dst,
-		      GLuint mask,
-		      const struct brw_reg *arg0,
-		      const struct brw_reg *arg1,
-		      const struct brw_reg *arg2),
-	 const struct brw_reg *dst,
-	 GLuint mask,
-	 const struct brw_reg *arg0,
-	 const struct brw_reg *arg1,
-	 const struct brw_reg *arg2)
-{
-    struct brw_compile *p = &c->func;
-    struct brw_reg tmp_arg0[4], tmp_arg1[4], tmp_arg2[4];
-    int i, j;
-    int mark = mark_tmps(c);
-
-    for (j = 0; j < 4; j++) {
-	tmp_arg0[j] = arg0[j];
-	tmp_arg1[j] = arg1[j];
-	tmp_arg2[j] = arg2[j];
-    }
-
-    for (i = 0; i < 4; i++) {
-	if (mask & (1<<i)) {
-	    for (j = 0; j < 4; j++) {
-		if (arg0[j].file == dst[i].file &&
-		    dst[i].nr == arg0[j].nr) {
-		    tmp_arg0[j] = alloc_tmp(c);
-		    brw_MOV(p, tmp_arg0[j], arg0[j]);
-		}
-		if (arg1[j].file == dst[i].file &&
-		    dst[i].nr == arg1[j].nr) {
-		    tmp_arg1[j] = alloc_tmp(c);
-		    brw_MOV(p, tmp_arg1[j], arg1[j]);
-		}
-		if (arg2[j].file == dst[i].file &&
-		    dst[i].nr == arg2[j].nr) {
-		    tmp_arg2[j] = alloc_tmp(c);
-		    brw_MOV(p, tmp_arg2[j], arg2[j]);
-		}
-	    }
-	}
-    }
-
-    func(p, dst, mask, tmp_arg0, tmp_arg1, tmp_arg2);
-
-    release_tmps(c, mark);
-}
-
-/* Workaround for using brw_wm_emit.c's emit functions, which expect
- * destination regs to be uniquely written.  Moves arguments out to
- * temporaries as necessary for instructions which use their destination as
- * a temporary.
- */
-static void
-unalias2(struct brw_wm_compile *c,
-	 void (*func)(struct brw_compile *c,
-		      const struct brw_reg *dst,
-		      GLuint mask,
-		      const struct brw_reg *arg0,
-		      const struct brw_reg *arg1),
-	 const struct brw_reg *dst,
-	 GLuint mask,
-	 const struct brw_reg *arg0,
-	 const struct brw_reg *arg1)
-{
-    struct brw_compile *p = &c->func;
-    struct brw_reg tmp_arg0[4], tmp_arg1[4];
-    int i, j;
-    int mark = mark_tmps(c);
-
-    for (j = 0; j < 4; j++) {
-	tmp_arg0[j] = arg0[j];
-	tmp_arg1[j] = arg1[j];
-    }
-
-    for (i = 0; i < 4; i++) {
-	if (mask & (1<<i)) {
-	    for (j = 0; j < 4; j++) {
-		if (arg0[j].file == dst[i].file &&
-		    dst[i].nr == arg0[j].nr) {
-		    tmp_arg0[j] = alloc_tmp(c);
-		    brw_MOV(p, tmp_arg0[j], arg0[j]);
-		}
-		if (arg1[j].file == dst[i].file &&
-		    dst[i].nr == arg1[j].nr) {
-		    tmp_arg1[j] = alloc_tmp(c);
-		    brw_MOV(p, tmp_arg1[j], arg1[j]);
-		}
-	    }
-	}
-    }
-
-    func(p, dst, mask, tmp_arg0, tmp_arg1);
-
-    release_tmps(c, mark);
-}
-
 static void emit_arl(struct brw_wm_compile *c,
                      const struct prog_instruction *inst)
 {
@@ -1813,14 +1707,29 @@ static void
 get_argument_regs(struct brw_wm_compile *c,
 		  const struct prog_instruction *inst,
 		  int index,
+		  struct brw_reg *dst,
 		  struct brw_reg *regs,
 		  int mask)
 {
-    int i;
+    struct brw_compile *p = &c->func;
+    int i, j;
 
     for (i = 0; i < 4; i++) {
-	if (mask & (1 << i))
+	if (mask & (1 << i)) {
 	    regs[i] = get_src_reg(c, inst, index, i);
+
+	    /* Unalias destination registers from our sources. */
+	    if (regs[i].file == BRW_GENERAL_REGISTER_FILE) {
+	       for (j = 0; j < 4; j++) {
+		   if (memcmp(&regs[i], &dst[j], sizeof(regs[0])) == 0) {
+		       struct brw_reg tmp = alloc_tmp(c);
+		       brw_MOV(p, tmp, regs[i]);
+		       regs[i] = tmp;
+		       break;
+		   }
+	       }
+	    }
+	}
     }
 }
 
@@ -1845,6 +1754,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
 	int dst_flags;
 	struct brw_reg args[3][4], dst[4];
 	int j;
+	int mark = mark_tmps( c );
 
         c->cur_inst = i;
 
@@ -1866,7 +1776,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
 	   }
 	}
 	for (j = 0; j < brw_wm_nr_args(inst->Opcode); j++)
-	    get_argument_regs(c, inst, j, args[j], WRITEMASK_XYZW);
+	    get_argument_regs(c, inst, j, dst, args[j], WRITEMASK_XYZW);
 
 	dst_flags = inst->DstReg.WriteMask;
 	if (inst->SaturateMode == SATURATE_ZERO_ONE)
@@ -1920,8 +1830,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
 		emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
 		break;
 	    case OPCODE_LRP:
-		unalias3(c, emit_lrp,
-			 dst, dst_flags, args[0], args[1], args[2]);
+		emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
 		break;
 	    case OPCODE_TRUNC:
 		emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]);
@@ -1961,10 +1870,10 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
 		emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
 		break;
 	    case OPCODE_MIN:	
-		unalias2(c, emit_min, dst, dst_flags, args[0], args[1]);
+		emit_min(p, dst, dst_flags, args[0], args[1]);
 		break;
 	    case OPCODE_MAX:	
-		unalias2(c, emit_max, dst, dst_flags, args[0], args[1]);
+		emit_max(p, dst, dst_flags, args[0], args[1]);
 		break;
 	    case OPCODE_DDX:
 	    case OPCODE_DDY:
@@ -2119,6 +2028,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
 			inst->Opcode);
 	}
 
+	/* Release temporaries containing any unaliased source regs. */
+	release_tmps( c, mark );
+
 	if (inst->CondUpdate)
 	    brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
 	else
-- 
cgit v1.2.3


From a81836ee2fe5092d695b717addf8cec91f569777 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 9 Mar 2010 14:22:51 -0800
Subject: i965: Fix ENDLOOP to only patch up this loop's BREAK and CONT.

Corresponds to d225a25e21a24508aea3b877c78beb35502e942d and fixes
piglit glsl-fs-loop-nested.  Bug #25173.
---
 src/mesa/drivers/dri/i965/brw_wm_glsl.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index 13306accda..ea3c2405af 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -2012,11 +2012,13 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
                   /* patch all the BREAK/CONT instructions from last BGNLOOP */
                   while (inst0 > loop_inst[loop_depth]) {
                      inst0--;
-                     if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+                     if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+			 inst0->bits3.if_else.jump_count == 0) {
 			inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
 			inst0->bits3.if_else.pop_count = 0;
                      }
-                     else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+                     else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+			      inst0->bits3.if_else.jump_count == 0) {
                         inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
                         inst0->bits3.if_else.pop_count = 0;
                      }
-- 
cgit v1.2.3


From 007f25b3bde88de550d6ddba6a96769e4f3d5ce7 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Tue, 9 Mar 2010 20:03:35 -0800
Subject: r300g: Reorder regs, save one dword.

Or, put another way, save 10% on rendering calls.
---
 src/gallium/drivers/r300/r300_render.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 2605628c02..9c001ae186 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -183,7 +183,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
         }
     }
 
-    dwords = 10 + count * vertex_size;
+    dwords = 9 + count * vertex_size;
 
     r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + dwords);
     r300_emit_buffer_validate(r300, FALSE, NULL);
@@ -193,8 +193,9 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
     OUT_CS_REG(R300_GA_COLOR_CONTROL,
             r300_provoking_vertex_fixes(r300, mode));
     OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size);
-    OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0);
-    OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1);
+    OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
+    OUT_CS(count - 1);
+    OUT_CS(0);
     OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size);
     OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) |
             r300_translate_primitive(mode));
@@ -238,15 +239,16 @@ static void r300_emit_draw_arrays(struct r300_context *r300,
 
     if (alt_num_verts) {
         assert(count < (1 << 24));
-        BEGIN_CS(10);
+        BEGIN_CS(9);
         OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count);
     } else {
-        BEGIN_CS(8);
+        BEGIN_CS(7);
     }
     OUT_CS_REG(R300_GA_COLOR_CONTROL,
             r300_provoking_vertex_fixes(r300, mode));
-    OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0);
-    OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1);
+    OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
+    OUT_CS(count - 1);
+    OUT_CS(0);
     OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
     OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) |
            r300_translate_primitive(mode) |
@@ -281,15 +283,16 @@ static void r300_emit_draw_elements(struct r300_context *r300,
     maxIndex = MIN2(maxIndex, r300->vertex_buffer_max_index);
 
     if (alt_num_verts) {
-        BEGIN_CS(16);
+        BEGIN_CS(15);
         OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count);
     } else {
-        BEGIN_CS(14);
+        BEGIN_CS(13);
     }
     OUT_CS_REG(R300_GA_COLOR_CONTROL,
             r300_provoking_vertex_fixes(r300, mode));
-    OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, minIndex);
-    OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, maxIndex);
+    OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
+    OUT_CS(maxIndex);
+    OUT_CS(minIndex);
     OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0);
     if (indexSize == 4) {
         count_dwords = count;
-- 
cgit v1.2.3


From ac2b35fd2d5008a39fa394b7b04fd29b899d3e55 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Wed, 3 Mar 2010 11:45:08 +1000
Subject: nv50: use relocs rather than re-uploading TIC all the time

---
 src/gallium/drivers/nv50/nv50_context.h        |  1 +
 src/gallium/drivers/nv50/nv50_state_validate.c |  3 +--
 src/gallium/drivers/nv50/nv50_tex.c            | 30 ++++++++++++++++++++++++++
 3 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index e91a61b27d..2cb5538c54 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -249,6 +249,7 @@ extern void nv50_so_init_sifc(struct nv50_context *nv50,
 			      unsigned offset, unsigned size);
 
 /* nv50_tex.c */
+extern void nv50_tex_relocs(struct nv50_context *);
 extern void nv50_tex_validate(struct nv50_context *);
 
 /* nv50_transfer.c */
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index c974cc92dc..0d30957ee3 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -290,8 +290,7 @@ nv50_state_flush_notify(struct nouveau_channel *chan)
 {
 	struct nv50_context *nv50 = chan->user_private;
 
-	if (nv50->state.tic_upload && !(nv50->dirty & NV50_NEW_TEXTURE))
-		so_emit(chan, nv50->state.tic_upload);
+	nv50_tex_relocs(nv50);
 
 	so_emit_reloc_markers(chan, nv50->state.fb);
 	so_emit_reloc_markers(chan, nv50->state.vertprog);
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index de0560e20c..ed3440fcc1 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -24,6 +24,7 @@
 #include "nv50_texture.h"
 
 #include "nouveau/nouveau_stateobj.h"
+#include "nouveau/nouveau_reloc.h"
 
 #include "util/u_format.h"
 
@@ -194,6 +195,35 @@ nv50_validate_textures(struct nv50_context *nv50, struct nouveau_stateobj *so,
 	return TRUE;
 }
 
+void
+nv50_tex_relocs(struct nv50_context *nv50)
+{
+	struct nouveau_channel *chan = nv50->screen->tesla->channel;
+	int p, unit;
+
+	p = PIPE_SHADER_FRAGMENT;
+	for (unit = 0; unit < nv50->miptree_nr[p]; unit++) {
+		if (!nv50->miptree[p][unit])
+			continue;
+		nouveau_reloc_emit(chan, nv50->screen->tic,
+				   ((p * 32) + unit) * 32, NULL,
+				   nv50->miptree[p][unit]->base.bo, 0, 0,
+				   NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW |
+				   NOUVEAU_BO_RD, 0, 0);
+	}
+
+	p = PIPE_SHADER_VERTEX;
+	for (unit = 0; unit < nv50->miptree_nr[p]; unit++) {
+		if (!nv50->miptree[p][unit])
+			continue;
+		nouveau_reloc_emit(chan, nv50->screen->tic,
+				   ((p * 32) + unit) * 32, NULL,
+				   nv50->miptree[p][unit]->base.bo, 0, 0,
+				   NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW |
+				   NOUVEAU_BO_RD, 0, 0);
+	}
+}
+
 void
 nv50_tex_validate(struct nv50_context *nv50)
 {
-- 
cgit v1.2.3


From 62ab89785b55e60b978dc2b32995676859299c80 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Wed, 3 Mar 2010 14:26:25 +1000
Subject: nv50: rework state emission

---
 src/gallium/drivers/nv50/nv50_context.c        |  42 +--
 src/gallium/drivers/nv50/nv50_context.h        |  39 +--
 src/gallium/drivers/nv50/nv50_program.c        |  31 +-
 src/gallium/drivers/nv50/nv50_screen.c         |   4 +-
 src/gallium/drivers/nv50/nv50_state_validate.c | 430 ++++++++++++-------------
 src/gallium/drivers/nv50/nv50_tex.c            |   7 +-
 src/gallium/drivers/nv50/nv50_vbo.c            |   7 +-
 7 files changed, 249 insertions(+), 311 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c
index 7be12fcdef..eff012c14d 100644
--- a/src/gallium/drivers/nv50/nv50_context.c
+++ b/src/gallium/drivers/nv50/nv50_context.c
@@ -46,43 +46,13 @@ static void
 nv50_destroy(struct pipe_context *pipe)
 {
 	struct nv50_context *nv50 = nv50_context(pipe);
+	int i;
 
-        if (nv50->state.fb)
-		so_ref(NULL, &nv50->state.fb);
-	if (nv50->state.blend)
-		so_ref(NULL, &nv50->state.blend);
-	if (nv50->state.blend_colour)
-		so_ref(NULL, &nv50->state.blend_colour);
-	if (nv50->state.zsa)
-		so_ref(NULL, &nv50->state.zsa);
-	if (nv50->state.rast)
-		so_ref(NULL, &nv50->state.rast);
-	if (nv50->state.stipple)
-		so_ref(NULL, &nv50->state.stipple);
-	if (nv50->state.scissor)
-		so_ref(NULL, &nv50->state.scissor);
-	if (nv50->state.viewport)
-		so_ref(NULL, &nv50->state.viewport);
-	if (nv50->state.tsc_upload)
-		so_ref(NULL, &nv50->state.tsc_upload);
-	if (nv50->state.tic_upload)
-		so_ref(NULL, &nv50->state.tic_upload);
-	if (nv50->state.vertprog)
-		so_ref(NULL, &nv50->state.vertprog);
-	if (nv50->state.fragprog)
-		so_ref(NULL, &nv50->state.fragprog);
-	if (nv50->state.geomprog)
-		so_ref(NULL, &nv50->state.geomprog);
-	if (nv50->state.fp_linkage)
-		so_ref(NULL, &nv50->state.fp_linkage);
-	if (nv50->state.gp_linkage)
-		so_ref(NULL, &nv50->state.gp_linkage);
-	if (nv50->state.vtxfmt)
-		so_ref(NULL, &nv50->state.vtxfmt);
-	if (nv50->state.vtxbuf)
-		so_ref(NULL, &nv50->state.vtxbuf);
-	if (nv50->state.vtxattr)
-		so_ref(NULL, &nv50->state.vtxattr);
+	for (i = 0; i < 64; i++) {
+		if (!nv50->state.hw[i])
+			continue;
+		so_ref(NULL, &nv50->state.hw[i]);
+	}
 
 	draw_destroy(nv50->draw);
 
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 2cb5538c54..5fa3911114 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -123,27 +123,11 @@ nv50_surface(struct pipe_surface *pt)
 }
 
 struct nv50_state {
-	unsigned dirty;
+	struct nouveau_stateobj *hw[64];
+	uint64_t hw_dirty;
 
-	struct nouveau_stateobj *fb;
-	struct nouveau_stateobj *blend;
-	struct nouveau_stateobj *blend_colour;
-	struct nouveau_stateobj *zsa;
-	struct nouveau_stateobj *stencil_ref;
-	struct nouveau_stateobj *rast;
-	struct nouveau_stateobj *stipple;
-	struct nouveau_stateobj *scissor;
 	unsigned scissor_enabled;
-	struct nouveau_stateobj *viewport;
-	struct nouveau_stateobj *tsc_upload;
-	struct nouveau_stateobj *tic_upload;
 	unsigned miptree_nr[PIPE_SHADER_TYPES];
-	struct nouveau_stateobj *vertprog;
-	struct nouveau_stateobj *fragprog;
-	struct nouveau_stateobj *geomprog;
-	struct nouveau_stateobj *fp_linkage;
-	struct nouveau_stateobj *gp_linkage;
-	struct nouveau_stateobj *vtxfmt;
 	struct nouveau_stateobj *vtxbuf;
 	struct nouveau_stateobj *vtxattr;
 	struct nouveau_stateobj *instbuf;
@@ -224,18 +208,23 @@ extern void nv50_draw_elements_instanced(struct pipe_context *pipe,
 					 unsigned startInstance,
 					 unsigned instanceCount);
 extern void nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso);
-extern void nv50_vbo_validate(struct nv50_context *nv50);
+extern struct nouveau_stateobj *nv50_vbo_validate(struct nv50_context *nv50);
 
 /* nv50_clear.c */
 extern void nv50_clear(struct pipe_context *pipe, unsigned buffers,
 		       const float *rgba, double depth, unsigned stencil);
 
 /* nv50_program.c */
-extern void nv50_vertprog_validate(struct nv50_context *nv50);
-extern void nv50_fragprog_validate(struct nv50_context *nv50);
-extern void nv50_geomprog_validate(struct nv50_context *nv50);
-extern void nv50_fp_linkage_validate(struct nv50_context *nv50);
-extern void nv50_gp_linkage_validate(struct nv50_context *nv50);
+extern struct nouveau_stateobj *
+nv50_vertprog_validate(struct nv50_context *nv50);
+extern struct nouveau_stateobj *
+nv50_fragprog_validate(struct nv50_context *nv50);
+extern struct nouveau_stateobj *
+nv50_geomprog_validate(struct nv50_context *nv50);
+extern struct nouveau_stateobj *
+nv50_fp_linkage_validate(struct nv50_context *nv50);
+extern struct nouveau_stateobj *
+nv50_gp_linkage_validate(struct nv50_context *nv50);
 extern void nv50_program_destroy(struct nv50_context *nv50,
 				 struct nv50_program *p);
 
@@ -250,7 +239,7 @@ extern void nv50_so_init_sifc(struct nv50_context *nv50,
 
 /* nv50_tex.c */
 extern void nv50_tex_relocs(struct nv50_context *);
-extern void nv50_tex_validate(struct nv50_context *);
+extern struct nouveau_stateobj *nv50_tex_validate(struct nv50_context *);
 
 /* nv50_transfer.c */
 extern void
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 2372cbbef6..50bb982076 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -4270,7 +4270,7 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
 	FREE(up);
 }
 
-void
+struct nouveau_stateobj *
 nv50_vertprog_validate(struct nv50_context *nv50)
 {
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -4301,11 +4301,10 @@ nv50_vertprog_validate(struct nv50_context *nv50)
 	so_data  (so, p->cfg.high_temp);
 	so_method(so, tesla, NV50TCL_VP_START_ID, 1);
 	so_data  (so, 0); /* program start offset */
-	so_ref(so, &nv50->state.vertprog);
-	so_ref(NULL, &so);
+	return so;
 }
 
-void
+struct nouveau_stateobj *
 nv50_fragprog_validate(struct nv50_context *nv50)
 {
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -4337,11 +4336,10 @@ nv50_fragprog_validate(struct nv50_context *nv50)
 	so_data  (so, p->cfg.regs[3]);
 	so_method(so, tesla, NV50TCL_FP_START_ID, 1);
 	so_data  (so, 0); /* program start offset */
-	so_ref(so, &nv50->state.fragprog);
-	so_ref(NULL, &so);
+	return so;
 }
 
-void
+struct nouveau_stateobj *
 nv50_geomprog_validate(struct nv50_context *nv50)
 {
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -4373,8 +4371,7 @@ nv50_geomprog_validate(struct nv50_context *nv50)
 	so_data  (so, p->cfg.vert_count);
 	so_method(so, tesla, NV50TCL_GP_START_ID, 1);
 	so_data  (so, 0);
-	so_ref(so, &nv50->state.geomprog);
-	so_ref(NULL, &so);
+	return so;
 }
 
 static uint32_t
@@ -4454,7 +4451,7 @@ nv50_vec4_map(uint32_t *map32, int mid, uint8_t zval, uint32_t lin[4],
 	return mid;
 }
 
-void
+struct nouveau_stateobj *
 nv50_fp_linkage_validate(struct nv50_context *nv50)
 {
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -4580,8 +4577,7 @@ nv50_fp_linkage_validate(struct nv50_context *nv50)
 	so_method(so, tesla, NV50TCL_GP_ENABLE, 1);
 	so_data  (so, (vp->type == PIPE_SHADER_GEOMETRY) ? 1 : 0);
 
-	so_ref(so, &nv50->state.fp_linkage);
-	so_ref(NULL, &so);
+	return so;
 }
 
 static int
@@ -4615,7 +4611,7 @@ construct_vp_gp_mapping(uint32_t *map32, int m,
 	return m;
 }
 
-void
+struct nouveau_stateobj *
 nv50_gp_linkage_validate(struct nv50_context *nv50)
 {
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -4625,10 +4621,8 @@ nv50_gp_linkage_validate(struct nv50_context *nv50)
 	uint32_t map[16];
 	int m = 0;
 
-	if (!gp) {
-		so_ref(NULL, &nv50->state.gp_linkage);
-		return;
-	}
+	if (!gp)
+		return NULL;
 	memset(map, 0, sizeof(map));
 
 	m = construct_vp_gp_mapping(map, m, vp, gp);
@@ -4646,8 +4640,7 @@ nv50_gp_linkage_validate(struct nv50_context *nv50)
 	so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), m);
 	so_datap (so, map, m);
 
-	so_ref(so, &nv50->state.gp_linkage);
-	so_ref(NULL, &so);
+	return so;
 }
 
 void
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index eed6031eaf..b530de64b6 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -215,9 +215,9 @@ nv50_pre_pipebuffer_map(struct pipe_screen *pscreen, struct pipe_buffer *pb,
 	/* Our vtxbuf got mapped, it can no longer be considered part of current
 	 * state, remove it to avoid emitting reloc markers.
 	 */
-	if (ctx && ctx->state.vtxbuf && so_bo_is_reloc(ctx->state.vtxbuf,
+	if (ctx && ctx->state.hw[17] && so_bo_is_reloc(ctx->state.hw[17],
 			nouveau_bo(pb))) {
-		so_ref(NULL, &ctx->state.vtxbuf);
+		so_ref(NULL, &ctx->state.hw[17]);
 		ctx->dirty |= NV50_NEW_ARRAYS;
 	}
 
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index 0d30957ee3..abdb9a55c8 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -25,8 +25,8 @@
 #include "nv50_context.h"
 #include "nouveau/nouveau_stateobj.h"
 
-static void
-nv50_state_validate_fb(struct nv50_context *nv50)
+static struct nouveau_stateobj *
+validate_fb(struct nv50_context *nv50)
 {
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
 	struct nouveau_stateobj *so = so_new(32, 79, 18);
@@ -169,10 +169,8 @@ nv50_state_validate_fb(struct nv50_context *nv50)
 
 	/* we set scissors to framebuffer size when they're 'turned off' */
 	nv50->dirty |= NV50_NEW_SCISSOR;
-	so_ref(NULL, &nv50->state.scissor);
-
-	so_ref(so, &nv50->state.fb);
-	so_ref(NULL, &so);
+	so_ref(NULL, &nv50->state.hw[12]);
+	return so;
 }
 
 static void
@@ -199,261 +197,251 @@ nv50_validate_samplers(struct nv50_context *nv50, struct nouveau_stateobj *so,
 	}
 }
 
-static void
-nv50_state_emit(struct nv50_context *nv50)
+static struct nouveau_stateobj *
+validate_blend(struct nv50_context *nv50)
 {
-	struct nv50_screen *screen = nv50->screen;
-	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_stateobj *so = NULL;
+	so_ref(nv50->blend->so, &so);
+	return so;
+}
 
-	/* XXX: this is racy for multiple contexts active on separate
-	 * threads.
-	 */
-	if (screen->cur_ctx != nv50) {
-		if (nv50->state.fb)
-			nv50->state.dirty |= NV50_NEW_FRAMEBUFFER;
-		if (nv50->state.blend)
-			nv50->state.dirty |= NV50_NEW_BLEND;
-		if (nv50->state.zsa)
-			nv50->state.dirty |= NV50_NEW_ZSA;
-		if (nv50->state.vertprog)
-			nv50->state.dirty |= NV50_NEW_VERTPROG;
-		if (nv50->state.fragprog)
-			nv50->state.dirty |= NV50_NEW_FRAGPROG;
-		if (nv50->state.geomprog)
-			nv50->state.dirty |= NV50_NEW_GEOMPROG;
-		if (nv50->state.rast)
-			nv50->state.dirty |= NV50_NEW_RASTERIZER;
-		if (nv50->state.blend_colour)
-			nv50->state.dirty |= NV50_NEW_BLEND_COLOUR;
-		if (nv50->state.stencil_ref)
-			nv50->state.dirty |= NV50_NEW_STENCIL_REF;
-		if (nv50->state.stipple)
-			nv50->state.dirty |= NV50_NEW_STIPPLE;
-		if (nv50->state.scissor)
-			nv50->state.dirty |= NV50_NEW_SCISSOR;
-		if (nv50->state.viewport)
-			nv50->state.dirty |= NV50_NEW_VIEWPORT;
-		if (nv50->state.tsc_upload)
-			nv50->state.dirty |= NV50_NEW_SAMPLER;
-		if (nv50->state.tic_upload)
-			nv50->state.dirty |= NV50_NEW_TEXTURE;
-		if (nv50->state.vtxfmt && nv50->state.vtxbuf)
-			nv50->state.dirty |= NV50_NEW_ARRAYS;
-		screen->cur_ctx = nv50;
-	}
+static struct nouveau_stateobj *
+validate_zsa(struct nv50_context *nv50)
+{
+	struct nouveau_stateobj *so = NULL;
+	so_ref(nv50->zsa->so, &so);
+	return so;
+}
 
-	if (nv50->state.dirty & NV50_NEW_FRAMEBUFFER)
-		so_emit(chan, nv50->state.fb);
-	if (nv50->state.dirty & NV50_NEW_BLEND)
-		so_emit(chan, nv50->state.blend);
-	if (nv50->state.dirty & NV50_NEW_ZSA)
-		so_emit(chan, nv50->state.zsa);
-	if (nv50->state.dirty & NV50_NEW_VERTPROG)
-		so_emit(chan, nv50->state.vertprog);
-	if (nv50->state.dirty & NV50_NEW_FRAGPROG)
-		so_emit(chan, nv50->state.fragprog);
-	if (nv50->state.dirty & NV50_NEW_GEOMPROG && nv50->state.geomprog)
-		so_emit(chan, nv50->state.geomprog);
-	if (nv50->state.dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG |
-				 NV50_NEW_GEOMPROG | NV50_NEW_RASTERIZER))
-		so_emit(chan, nv50->state.fp_linkage);
-	if ((nv50->state.dirty & (NV50_NEW_VERTPROG | NV50_NEW_GEOMPROG))
-	    && nv50->state.gp_linkage)
-		so_emit(chan, nv50->state.gp_linkage);
-	if (nv50->state.dirty & NV50_NEW_RASTERIZER)
-		so_emit(chan, nv50->state.rast);
-	if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR)
-		so_emit(chan, nv50->state.blend_colour);
-	if (nv50->state.dirty & NV50_NEW_STENCIL_REF)
-		so_emit(chan, nv50->state.stencil_ref);
-	if (nv50->state.dirty & NV50_NEW_STIPPLE)
-		so_emit(chan, nv50->state.stipple);
-	if (nv50->state.dirty & NV50_NEW_SCISSOR)
-		so_emit(chan, nv50->state.scissor);
-	if (nv50->state.dirty & NV50_NEW_VIEWPORT)
-		so_emit(chan, nv50->state.viewport);
-	if (nv50->state.dirty & NV50_NEW_SAMPLER)
-		so_emit(chan, nv50->state.tsc_upload);
-	if (nv50->state.dirty & NV50_NEW_TEXTURE)
-		so_emit(chan, nv50->state.tic_upload);
-	if (nv50->state.dirty & NV50_NEW_ARRAYS) {
-		so_emit(chan, nv50->state.vtxfmt);
-		so_emit(chan, nv50->state.vtxbuf);
-		if (nv50->state.vtxattr)
-			so_emit(chan, nv50->state.vtxattr);
-	}
-	nv50->state.dirty = 0;
+static struct nouveau_stateobj *
+validate_rast(struct nv50_context *nv50)
+{
+	struct nouveau_stateobj *so = NULL;
+	so_ref(nv50->rasterizer->so, &so);
+	return so;
 }
 
-void
-nv50_state_flush_notify(struct nouveau_channel *chan)
+static struct nouveau_stateobj *
+validate_blend_colour(struct nv50_context *nv50)
 {
-	struct nv50_context *nv50 = chan->user_private;
+	struct nouveau_grobj *tesla = nv50->screen->tesla;
+	struct nouveau_stateobj *so = so_new(1, 4, 0);
+
+	so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4);
+	so_data  (so, fui(nv50->blend_colour.color[0]));
+	so_data  (so, fui(nv50->blend_colour.color[1]));
+	so_data  (so, fui(nv50->blend_colour.color[2]));
+	so_data  (so, fui(nv50->blend_colour.color[3]));
+	return so;
+}
 
-	nv50_tex_relocs(nv50);
+static struct nouveau_stateobj *
+validate_stencil_ref(struct nv50_context *nv50)
+{
+	struct nouveau_grobj *tesla = nv50->screen->tesla;
+	struct nouveau_stateobj *so = so = so_new(2, 2, 0);
 
-	so_emit_reloc_markers(chan, nv50->state.fb);
-	so_emit_reloc_markers(chan, nv50->state.vertprog);
-	so_emit_reloc_markers(chan, nv50->state.fragprog);
-	so_emit_reloc_markers(chan, nv50->state.vtxbuf);
-	so_emit_reloc_markers(chan, nv50->screen->static_init);
+	so_method(so, tesla, NV50TCL_STENCIL_FRONT_FUNC_REF, 1);
+	so_data  (so, nv50->stencil_ref.ref_value[0]);
+	so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 1);
+	so_data  (so, nv50->stencil_ref.ref_value[1]);
+	return so;
+}
 
-	if (nv50->state.instbuf)
-		so_emit_reloc_markers(chan, nv50->state.instbuf);
+static struct nouveau_stateobj *
+validate_stipple(struct nv50_context *nv50)
+{
+	struct nouveau_grobj *tesla = nv50->screen->tesla;
+	struct nouveau_stateobj *so = so_new(1, 32, 0);
+	int i;
+
+	so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32);
+	for (i = 0; i < 32; i++)
+		so_data(so, util_bswap32(nv50->stipple.stipple[i]));
+	return so;
 }
 
-boolean
-nv50_state_validate(struct nv50_context *nv50)
+static struct nouveau_stateobj *
+validate_scissor(struct nv50_context *nv50)
 {
+	struct pipe_rasterizer_state *rast = &nv50->rasterizer->pipe;
+	struct pipe_scissor_state *s = &nv50->scissor;
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
 	struct nouveau_stateobj *so;
-	unsigned i;
 
-	if (nv50->dirty & NV50_NEW_FRAMEBUFFER)
-		nv50_state_validate_fb(nv50);
+	if (nv50->state.hw[12] &&
+	    (rast->scissor == 0 && nv50->state.scissor_enabled == 0))
+		return NULL;
+	nv50->state.scissor_enabled = rast->scissor;
 
-	if (nv50->dirty & NV50_NEW_BLEND)
-		so_ref(nv50->blend->so, &nv50->state.blend);
+	so = so_new(1, 2, 0);
+	so_method(so, tesla, NV50TCL_SCISSOR_HORIZ(0), 2);
+	if (nv50->state.scissor_enabled) {
+		so_data(so, (s->maxx << 16) | s->minx);
+		so_data(so, (s->maxy << 16) | s->miny);
+	} else {
+		so_data(so, (nv50->framebuffer.width << 16));
+		so_data(so, (nv50->framebuffer.height << 16));
+	}
 
-	if (nv50->dirty & NV50_NEW_ZSA)
-		so_ref(nv50->zsa->so, &nv50->state.zsa);
+	return so;
+}
 
-	if (nv50->dirty & (NV50_NEW_VERTPROG | NV50_NEW_VERTPROG_CB))
-		nv50_vertprog_validate(nv50);
+static struct nouveau_stateobj *
+validate_viewport(struct nv50_context *nv50)
+{
+	struct nouveau_grobj *tesla = nv50->screen->tesla;
+	struct nouveau_stateobj *so = so_new(5, 9, 0);
+
+	so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3);
+	so_data  (so, fui(nv50->viewport.translate[0]));
+	so_data  (so, fui(nv50->viewport.translate[1]));
+	so_data  (so, fui(nv50->viewport.translate[2]));
+	so_method(so, tesla, NV50TCL_VIEWPORT_SCALE_X(0), 3);
+	so_data  (so, fui(nv50->viewport.scale[0]));
+	so_data  (so, fui(nv50->viewport.scale[1]));
+	so_data  (so, fui(nv50->viewport.scale[2]));
+
+	so_method(so, tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1);
+	so_data  (so, 1);
+	/* 0x0000 = remove whole primitive only (xyz)
+	 * 0x1018 = remove whole primitive only (xy), clamp z
+	 * 0x1080 = clip primitive (xyz)
+	 * 0x1098 = clip primitive (xy), clamp z
+	 */
+	so_method(so, tesla, NV50TCL_VIEW_VOLUME_CLIP_CTRL, 1);
+	so_data  (so, 0x1080);
+	/* no idea what 0f90 does */
+	so_method(so, tesla, 0x0f90, 1);
+	so_data  (so, 0);
 
-	if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB))
-		nv50_fragprog_validate(nv50);
+	return so;
+}
 
-	if (nv50->dirty & (NV50_NEW_GEOMPROG | NV50_NEW_GEOMPROG_CB))
-		nv50_geomprog_validate(nv50);
+static struct nouveau_stateobj *
+validate_sampler(struct nv50_context *nv50)
+{
+	struct nouveau_grobj *tesla = nv50->screen->tesla;
+	struct nouveau_stateobj *so;
+	unsigned nr = 0, i;
 
-	if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG |
-			   NV50_NEW_GEOMPROG | NV50_NEW_RASTERIZER))
-		nv50_fp_linkage_validate(nv50);
+	for (i = 0; i < PIPE_SHADER_TYPES; ++i)
+		nr += nv50->sampler_nr[i];
 
-	if (nv50->dirty & (NV50_NEW_GEOMPROG | NV50_NEW_VERTPROG))
-		nv50_gp_linkage_validate(nv50);
+	so = so_new(1 + 5 * PIPE_SHADER_TYPES,
+		    1 + 19 * PIPE_SHADER_TYPES + nr * 8,
+		    PIPE_SHADER_TYPES * 2);
 
-	if (nv50->dirty & NV50_NEW_RASTERIZER)
-		so_ref(nv50->rasterizer->so, &nv50->state.rast);
+	nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX);
+	nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT);
 
-	if (nv50->dirty & NV50_NEW_BLEND_COLOUR) {
-		so = so_new(1, 4, 0);
-		so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4);
-		so_data  (so, fui(nv50->blend_colour.color[0]));
-		so_data  (so, fui(nv50->blend_colour.color[1]));
-		so_data  (so, fui(nv50->blend_colour.color[2]));
-		so_data  (so, fui(nv50->blend_colour.color[3]));
-		so_ref(so, &nv50->state.blend_colour);
-		so_ref(NULL, &so);
-	}
-
-	if (nv50->dirty & NV50_NEW_STENCIL_REF) {
-		so = so_new(2, 2, 0);
-		so_method(so, tesla, NV50TCL_STENCIL_FRONT_FUNC_REF, 1);
-		so_data  (so, nv50->stencil_ref.ref_value[0]);
-		so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 1);
-		so_data  (so, nv50->stencil_ref.ref_value[1]);
-		so_ref(so, &nv50->state.stencil_ref);
-		so_ref(NULL, &so);
-	}
+	so_method(so, tesla, 0x1334, 1); /* flush TSC */
+	so_data  (so, 0);
 
-	if (nv50->dirty & NV50_NEW_STIPPLE) {
-		so = so_new(1, 32, 0);
-		so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32);
-		for (i = 0; i < 32; i++)
-			so_data(so, util_bswap32(nv50->stipple.stipple[i]));
-		so_ref(so, &nv50->state.stipple);
-		so_ref(NULL, &so);
-	}
+	return so;
+}
 
-	if (nv50->dirty & (NV50_NEW_SCISSOR | NV50_NEW_RASTERIZER)) {
-		struct pipe_rasterizer_state *rast = &nv50->rasterizer->pipe;
-		struct pipe_scissor_state *s = &nv50->scissor;
+static struct nouveau_stateobj *
+validate_vtxbuf(struct nv50_context *nv50)
+{
+	struct nouveau_stateobj *so = NULL;
+	so_ref(nv50->state.vtxbuf, &so);
+	return so;
+}
 
-		if (nv50->state.scissor &&
-		    (rast->scissor == 0 && nv50->state.scissor_enabled == 0))
-			goto scissor_uptodate;
-		nv50->state.scissor_enabled = rast->scissor;
+static struct nouveau_stateobj *
+validate_vtxattr(struct nv50_context *nv50)
+{
+	struct nouveau_stateobj *so = NULL;
+	so_ref(nv50->state.vtxattr, &so);
+	return so;
+}
 
-		so = so_new(1, 2, 0);
-		so_method(so, tesla, NV50TCL_SCISSOR_HORIZ(0), 2);
-		if (nv50->state.scissor_enabled) {
-			so_data(so, (s->maxx << 16) | s->minx);
-			so_data(so, (s->maxy << 16) | s->miny);
-		} else {
-			so_data(so, (nv50->framebuffer.width << 16));
-			so_data(so, (nv50->framebuffer.height << 16));
-		}
-		so_ref(so, &nv50->state.scissor);
-		so_ref(NULL, &so);
-		nv50->state.dirty |= NV50_NEW_SCISSOR;
-	}
-scissor_uptodate:
-
-	if (nv50->dirty & (NV50_NEW_VIEWPORT | NV50_NEW_RASTERIZER)) {
-		if (nv50->state.viewport &&
-		    !(nv50->dirty & NV50_NEW_VIEWPORT))
-			goto viewport_uptodate;
-
-		so = so_new(5, 9, 0);
-		so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3);
-		so_data  (so, fui(nv50->viewport.translate[0]));
-		so_data  (so, fui(nv50->viewport.translate[1]));
-		so_data  (so, fui(nv50->viewport.translate[2]));
-		so_method(so, tesla, NV50TCL_VIEWPORT_SCALE_X(0), 3);
-		so_data  (so, fui(nv50->viewport.scale[0]));
-		so_data  (so, fui(nv50->viewport.scale[1]));
-		so_data  (so, fui(nv50->viewport.scale[2]));
-
-		so_method(so, tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1);
-		so_data  (so, 1);
-		/* 0x0000 = remove whole primitive only (xyz)
-		 * 0x1018 = remove whole primitive only (xy), clamp z
-		 * 0x1080 = clip primitive (xyz)
-		 * 0x1098 = clip primitive (xy), clamp z
-		 */
-		so_method(so, tesla, NV50TCL_VIEW_VOLUME_CLIP_CTRL, 1);
-		so_data  (so, 0x1080);
-		/* no idea what 0f90 does */
-		so_method(so, tesla, 0x0f90, 1);
-		so_data  (so, 0);
+struct state_validate {
+	struct nouveau_stateobj *(*func)(struct nv50_context *nv50);
+	unsigned states;
+} validate_list[] = {
+	{ validate_fb             , NV50_NEW_FRAMEBUFFER                      },
+	{ validate_blend          , NV50_NEW_BLEND                            },
+	{ validate_zsa            , NV50_NEW_ZSA                              },
+	{ nv50_vertprog_validate  , NV50_NEW_VERTPROG | NV50_NEW_VERTPROG_CB  },
+	{ nv50_fragprog_validate  , NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB  },
+	{ nv50_geomprog_validate  , NV50_NEW_GEOMPROG | NV50_NEW_GEOMPROG_CB  },
+	{ nv50_fp_linkage_validate, NV50_NEW_VERTPROG | NV50_NEW_GEOMPROG |
+				    NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER   },
+	{ nv50_gp_linkage_validate, NV50_NEW_VERTPROG | NV50_NEW_GEOMPROG     },
+	{ validate_rast           , NV50_NEW_RASTERIZER                       },
+	{ validate_blend_colour   , NV50_NEW_BLEND_COLOUR                     },
+	{ validate_stencil_ref    , NV50_NEW_STENCIL_REF                      },
+	{ validate_stipple        , NV50_NEW_STIPPLE                          },
+	{ validate_scissor        , NV50_NEW_SCISSOR | NV50_NEW_RASTERIZER    },
+	{ validate_viewport       , NV50_NEW_VIEWPORT                         },
+	{ validate_sampler        , NV50_NEW_SAMPLER                          },
+	{ nv50_tex_validate       , NV50_NEW_TEXTURE | NV50_NEW_SAMPLER       },
+	{ nv50_vbo_validate       , NV50_NEW_ARRAYS                           },
+	{ validate_vtxbuf         , NV50_NEW_ARRAYS                           },
+	{ validate_vtxattr        , NV50_NEW_ARRAYS                           },
+	{}
+};
+#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
 
-		so_ref(so, &nv50->state.viewport);
-		so_ref(NULL, &so);
-		nv50->state.dirty |= NV50_NEW_VIEWPORT;
-	}
-viewport_uptodate:
+void
+nv50_state_flush_notify(struct nouveau_channel *chan)
+{
+	struct nv50_context *nv50 = chan->user_private;
 
-	if (nv50->dirty & NV50_NEW_SAMPLER) {
-		unsigned nr = 0;
+	nv50_tex_relocs(nv50);
 
-		for (i = 0; i < PIPE_SHADER_TYPES; ++i)
-			nr += nv50->sampler_nr[i];
+	so_emit_reloc_markers(chan, nv50->state.hw[0]); /* fb */
+	so_emit_reloc_markers(chan, nv50->state.hw[3]); /* vp */
+	so_emit_reloc_markers(chan, nv50->state.hw[4]); /* fp */
+	so_emit_reloc_markers(chan, nv50->state.hw[17]); /* vb */
+	so_emit_reloc_markers(chan, nv50->screen->static_init);
 
-		so = so_new(1 + 5 * PIPE_SHADER_TYPES,
-			    1 + 19 * PIPE_SHADER_TYPES + nr * 8,
-			    PIPE_SHADER_TYPES * 2);
+#if 0
+	if (nv50->state.instbuf)
+		so_emit_reloc_markers(chan, nv50->state.instbuf);
+#endif
+}
 
-		nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX);
-		nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT);
+boolean
+nv50_state_validate(struct nv50_context *nv50)
+{
+	struct nouveau_channel *chan = nv50->screen->base.channel;
+	int i;
 
-		so_method(so, tesla, 0x1334, 1); /* flush TSC */
-		so_data  (so, 0);
+	if (nv50->screen->cur_ctx != nv50) {
+		for (i = 0; i < validate_list_len; i++) {
+			if (nv50->state.hw[i])
+				nv50->state.hw_dirty |= (1 << i);
+		}
 
-		so_ref(so, &nv50->state.tsc_upload);
-		so_ref(NULL, &so);
+		nv50->screen->cur_ctx = nv50;
 	}
 
-	if (nv50->dirty & (NV50_NEW_TEXTURE | NV50_NEW_SAMPLER))
-		nv50_tex_validate(nv50);
+	for (i = 0; i < validate_list_len; i++) {
+		struct state_validate *validate = &validate_list[i];
+		struct nouveau_stateobj *so;
+
+		if (!(nv50->dirty & validate->states))
+			continue;
 
-	if (nv50->dirty & NV50_NEW_ARRAYS)
-		nv50_vbo_validate(nv50);
+		so = validate->func(nv50);
+		if (!so)
+			continue;
 
-	nv50->state.dirty |= nv50->dirty;
+		so_ref(so, &nv50->state.hw[i]);
+		so_ref(NULL, &so);
+		nv50->state.hw_dirty |= (1 << i);
+	}
 	nv50->dirty = 0;
-	nv50_state_emit(nv50);
+
+	while (nv50->state.hw_dirty) {
+		i = ffs(nv50->state.hw_dirty) - 1;
+		nv50->state.hw_dirty &= ~(1 << i);
+
+		so_emit(chan, nv50->state.hw[i]);
+	}
 
 	return TRUE;
 }
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index ed3440fcc1..4c48b12cd8 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -224,7 +224,7 @@ nv50_tex_relocs(struct nv50_context *nv50)
 	}
 }
 
-void
+struct nouveau_stateobj *
 nv50_tex_validate(struct nv50_context *nv50)
 {
 	struct nouveau_stateobj *so;
@@ -247,12 +247,11 @@ nv50_tex_validate(struct nv50_context *nv50)
 		so_ref(NULL, &so);
 
 		NOUVEAU_ERR("failed tex validate\n");
-		return;
+		return NULL;
 	}
 
 	so_method(so, tesla, 0x1330, 1); /* flush TIC */
 	so_data  (so, 0);
 
-	so_ref(so, &nv50->state.tic_upload);
-	so_ref(NULL, &so);
+	return so;
 }
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index c1dcb93b48..3e540013c4 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -792,7 +792,7 @@ nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso)
 	}
 }
 
-void
+struct nouveau_stateobj *
 nv50_vbo_validate(struct nv50_context *nv50)
 {
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -801,7 +801,7 @@ nv50_vbo_validate(struct nv50_context *nv50)
 
 	/* don't validate if Gallium took away our buffers */
 	if (nv50->vtxbuf_nr == 0)
-		return;
+		return NULL;
 	nv50->vbo_fifo = 0;
 
 	for (i = 0; i < nv50->vtxbuf_nr; ++i)
@@ -875,12 +875,11 @@ nv50_vbo_validate(struct nv50_context *nv50)
 	}
 	nv50->state.vtxelt_nr = nv50->vtxelt->num_elements;
 
-	so_ref (vtxfmt, &nv50->state.vtxfmt);
 	so_ref (vtxbuf, &nv50->state.vtxbuf);
 	so_ref (vtxattr, &nv50->state.vtxattr);
 	so_ref (NULL, &vtxbuf);
-	so_ref (NULL, &vtxfmt);
 	so_ref (NULL, &vtxattr);
+	return vtxfmt;
 }
 
 typedef void (*pfn_push)(struct nouveau_channel *, void *);
-- 
cgit v1.2.3


From e1d015fe222a3f6f942426c39fb1a17188df8d57 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Wed, 3 Mar 2010 14:43:21 +1000
Subject: nv50: ensure enough room for state changes in current pushbuf

Also allows the nv50_state_validate() caller to request a minimum amount
of space that itself requires, not all callers accurately use this yet
but the simple cases are now accounted for.

Rendering will also be dropped on the floor if validate fails now.
---
 src/gallium/drivers/nv50/nv50_clear.c          |  2 +-
 src/gallium/drivers/nv50/nv50_context.h        |  2 +-
 src/gallium/drivers/nv50/nv50_state_validate.c | 39 ++++++++++++++++++--------
 src/gallium/drivers/nv50/nv50_vbo.c            | 12 +++++---
 4 files changed, 38 insertions(+), 17 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_clear.c b/src/gallium/drivers/nv50/nv50_clear.c
index e0b2d2880b..8afc95c9fc 100644
--- a/src/gallium/drivers/nv50/nv50_clear.c
+++ b/src/gallium/drivers/nv50/nv50_clear.c
@@ -36,7 +36,7 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers,
 	struct pipe_framebuffer_state *fb = &nv50->framebuffer;
 	unsigned mode = 0, i;
 
-	if (!nv50_state_validate(nv50))
+	if (!nv50_state_validate(nv50, 64))
 		return;
 
 	if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 5fa3911114..ef0e99f0b1 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -229,7 +229,7 @@ extern void nv50_program_destroy(struct nv50_context *nv50,
 				 struct nv50_program *p);
 
 /* nv50_state_validate.c */
-extern boolean nv50_state_validate(struct nv50_context *nv50);
+extern boolean nv50_state_validate(struct nv50_context *nv50, unsigned dwords);
 extern void nv50_state_flush_notify(struct nouveau_channel *chan);
 
 extern void nv50_so_init_sifc(struct nv50_context *nv50,
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index abdb9a55c8..b01a3d87de 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -405,19 +405,11 @@ nv50_state_flush_notify(struct nouveau_channel *chan)
 }
 
 boolean
-nv50_state_validate(struct nv50_context *nv50)
+nv50_state_validate(struct nv50_context *nv50, unsigned nr_dwords)
 {
 	struct nouveau_channel *chan = nv50->screen->base.channel;
-	int i;
-
-	if (nv50->screen->cur_ctx != nv50) {
-		for (i = 0; i < validate_list_len; i++) {
-			if (nv50->state.hw[i])
-				nv50->state.hw_dirty |= (1 << i);
-		}
-
-		nv50->screen->cur_ctx = nv50;
-	}
+	unsigned nr_relocs = 0;
+	int ret, i;
 
 	for (i = 0; i < validate_list_len; i++) {
 		struct state_validate *validate = &validate_list[i];
@@ -430,12 +422,37 @@ nv50_state_validate(struct nv50_context *nv50)
 		if (!so)
 			continue;
 
+		nr_dwords += (so->total + so->cur);
+		nr_relocs += so->cur_reloc;
+
 		so_ref(so, &nv50->state.hw[i]);
 		so_ref(NULL, &so);
 		nv50->state.hw_dirty |= (1 << i);
 	}
 	nv50->dirty = 0;
 
+	if (nv50->screen->cur_ctx != nv50) {
+		for (i = 0; i < validate_list_len; i++) {
+			if (!nv50->state.hw[i] ||
+			    (nv50->state.hw_dirty & (1 << i)))
+				continue;
+
+			nr_dwords += (nv50->state.hw[i]->total +
+				      nv50->state.hw[i]->cur);
+			nr_relocs += nv50->state.hw[i]->cur_reloc;
+			nv50->state.hw_dirty |= (1 << i);
+		}
+
+		nv50->screen->cur_ctx = nv50;
+	}
+
+	ret = MARK_RING(chan, nr_dwords, nr_relocs);
+	if (ret) {
+		debug_printf("MARK_RING(%d, %d) failed: %d\n",
+			     nr_dwords, nr_relocs, ret);
+		return FALSE;
+	}
+
 	while (nv50->state.hw_dirty) {
 		i = ffs(nv50->state.hw_dirty) - 1;
 		nv50->state.hw_dirty &= ~(1 << i);
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index 3e540013c4..ce2e479cfd 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -425,7 +425,8 @@ nv50_draw_arrays_instanced(struct pipe_context *pipe,
 	if (!NV50_USING_LOATHED_EDGEFLAG(nv50))
 		nv50_upload_user_vbufs(nv50);
 
-	nv50_state_validate(nv50);
+	if (!nv50_state_validate(nv50, 0))
+		return;
 
 	nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step);
 
@@ -477,7 +478,8 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
 	boolean ret;
 
-	nv50_state_validate(nv50);
+	if (!nv50_state_validate(nv50, 11))
+		return;
 
 	BEGIN_RING(chan, tesla, 0x142c, 1);
 	OUT_RING  (chan, 0);
@@ -633,7 +635,8 @@ nv50_draw_elements_instanced(struct pipe_context *pipe,
 	if (!NV50_USING_LOATHED_EDGEFLAG(nv50))
 		nv50_upload_user_vbufs(nv50);
 
-	nv50_state_validate(nv50);
+	if (!nv50_state_validate(nv50, 0))
+		return;
 
 	nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step);
 
@@ -677,7 +680,8 @@ nv50_draw_elements(struct pipe_context *pipe,
 	struct pipe_screen *pscreen = pipe->screen;
 	void *map;
 	
-	nv50_state_validate(nv50);
+	if (!nv50_state_validate(nv50, 14))
+		return;
 
 	BEGIN_RING(chan, tesla, 0x142c, 1);
 	OUT_RING  (chan, 0);
-- 
cgit v1.2.3


From c926ed0e76bdbf842170f9396856e9cedb6e718f Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Wed, 3 Mar 2010 14:55:54 +1000
Subject: nv50: always emit dummy relocs for all still-referenced buffers

---
 src/gallium/drivers/nv50/nv50_state_validate.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index b01a3d87de..12e427d38b 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -405,10 +405,10 @@ nv50_state_flush_notify(struct nouveau_channel *chan)
 }
 
 boolean
-nv50_state_validate(struct nv50_context *nv50, unsigned nr_dwords)
+nv50_state_validate(struct nv50_context *nv50, unsigned wait_dwords)
 {
 	struct nouveau_channel *chan = nv50->screen->base.channel;
-	unsigned nr_relocs = 0;
+	unsigned nr_relocs = 128, nr_dwords = wait_dwords + 128;
 	int ret, i;
 
 	for (i = 0; i < validate_list_len; i++) {
@@ -460,6 +460,12 @@ nv50_state_validate(struct nv50_context *nv50, unsigned nr_dwords)
 		so_emit(chan, nv50->state.hw[i]);
 	}
 
+	/* Yes, really, we need to do this.  If a buffer that is referenced
+	 * on the hardware isn't part of changed state above, without doing
+	 * this the kernel is given no clue that the buffer is being used
+	 * still.  This can cause all sorts of fun issues.
+	 */
+	nv50_state_flush_notify(chan);
 	return TRUE;
 }
 
-- 
cgit v1.2.3


From 3f804b0f199680e981c70376f55ecc5fe8d16aec Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Wed, 3 Mar 2010 14:59:28 +1000
Subject: nv50: remove horrific abuse of nouveau_channel.flush_notify

Really though, using this at all is just not a good idea in the 3D driver.
I'm almost certain the hardware will not like a reloc appearing between
begin()/end().. Anyways, this is still better than before, more fixes
to come..
---
 src/gallium/drivers/nv50/nv50_context.c |  1 -
 src/gallium/drivers/nv50/nv50_vbo.c     | 10 ++++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c
index eff012c14d..0eb42f323f 100644
--- a/src/gallium/drivers/nv50/nv50_context.c
+++ b/src/gallium/drivers/nv50/nv50_context.c
@@ -93,7 +93,6 @@ nv50_create(struct pipe_screen *pscreen, void *priv)
 	nv50->pipe.is_buffer_referenced = nouveau_is_buffer_referenced;
 
 	screen->base.channel->user_private = nv50;
-	screen->base.channel->flush_notify = nv50_state_flush_notify;
 
 	nv50_init_surface_functions(nv50);
 	nv50_init_state_functions(nv50);
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index ce2e479cfd..215dfb74fc 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -427,6 +427,7 @@ nv50_draw_arrays_instanced(struct pipe_context *pipe,
 
 	if (!nv50_state_validate(nv50, 0))
 		return;
+	chan->flush_notify = nv50_state_flush_notify;
 
 	nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step);
 
@@ -464,6 +465,8 @@ nv50_draw_arrays_instanced(struct pipe_context *pipe,
 		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
 		OUT_RING  (chan, 0);
 	}
+
+	chan->flush_notify = NULL;
 	nv50_unmap_vbufs(nv50);
 
 	so_ref(NULL, &nv50->state.instbuf);
@@ -480,6 +483,7 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
 
 	if (!nv50_state_validate(nv50, 11))
 		return;
+	chan->flush_notify = nv50_state_flush_notify;
 
 	BEGIN_RING(chan, tesla, 0x142c, 1);
 	OUT_RING  (chan, 0);
@@ -500,6 +504,7 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
 	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
 	OUT_RING  (chan, 0);
 
+	chan->flush_notify = NULL;
 	nv50_unmap_vbufs(nv50);
 
         /* XXX: not sure what to do if ret != TRUE: flush and retry?
@@ -637,6 +642,7 @@ nv50_draw_elements_instanced(struct pipe_context *pipe,
 
 	if (!nv50_state_validate(nv50, 0))
 		return;
+	chan->flush_notify = nv50_state_flush_notify;
 
 	nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step);
 
@@ -664,6 +670,8 @@ nv50_draw_elements_instanced(struct pipe_context *pipe,
 		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
 		OUT_RING  (chan, 0);
 	}
+
+	chan->flush_notify = NULL;
 	nv50_unmap_vbufs(nv50);
 
 	so_ref(NULL, &nv50->state.instbuf);
@@ -682,6 +690,7 @@ nv50_draw_elements(struct pipe_context *pipe,
 	
 	if (!nv50_state_validate(nv50, 14))
 		return;
+	chan->flush_notify = nv50_state_flush_notify;
 
 	BEGIN_RING(chan, tesla, 0x142c, 1);
 	OUT_RING  (chan, 0);
@@ -720,6 +729,7 @@ nv50_draw_elements(struct pipe_context *pipe,
 
 	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
 	OUT_RING  (chan, 0);
+	chan->flush_notify = NULL;
 }
 
 static INLINE boolean
-- 
cgit v1.2.3


From 2a84db24b33993844ba5e5c036edca78718d2425 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Wed, 3 Mar 2010 15:02:53 +1000
Subject: nv50: remove pre_pipebuffer_map hack....

---
 src/gallium/drivers/nv50/nv50_screen.c | 23 -----------------------
 1 file changed, 23 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index b530de64b6..2a1b64ffdc 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -202,28 +202,6 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
 	FREE(screen);
 }
 
-static int
-nv50_pre_pipebuffer_map(struct pipe_screen *pscreen, struct pipe_buffer *pb,
-	unsigned usage)
-{
-	struct nv50_screen *screen = nv50_screen(pscreen);
-	struct nv50_context *ctx = screen->cur_ctx;
-
-	if (!(pb->usage & PIPE_BUFFER_USAGE_VERTEX))
-		return 0;
-
-	/* Our vtxbuf got mapped, it can no longer be considered part of current
-	 * state, remove it to avoid emitting reloc markers.
-	 */
-	if (ctx && ctx->state.hw[17] && so_bo_is_reloc(ctx->state.hw[17],
-			nouveau_bo(pb))) {
-		so_ref(NULL, &ctx->state.hw[17]);
-		ctx->dirty |= NV50_NEW_ARRAYS;
-	}
-
-	return 0;
-}
-
 struct pipe_screen *
 nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 {
@@ -252,7 +230,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	pscreen->get_paramf = nv50_screen_get_paramf;
 	pscreen->is_format_supported = nv50_screen_is_format_supported;
 	pscreen->context_create = nv50_create;
-	screen->base.pre_pipebuffer_map_callback = nv50_pre_pipebuffer_map;
 
 	nv50_screen_init_miptree_functions(pscreen);
 	nv50_transfer_init_screen_functions(pscreen);
-- 
cgit v1.2.3


From 0e6af1fcaaca6be94a1a0aba287a569299e27295 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Thu, 4 Mar 2010 09:52:47 +1000
Subject: nv50: avoid unnecessary vp/gp/fp state changes

---
 src/gallium/drivers/nv50/nv50_program.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 50bb982076..c857816b31 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -4286,6 +4286,9 @@ nv50_vertprog_validate(struct nv50_context *nv50)
 	nv50_program_validate_data(nv50, p);
 	nv50_program_validate_code(nv50, p);
 
+	if (!(nv50->dirty & NV50_NEW_VERTPROG))
+		return NULL;
+
 	so = so_new(5, 7, 2);
 	so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
 	so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
@@ -4320,6 +4323,9 @@ nv50_fragprog_validate(struct nv50_context *nv50)
 	nv50_program_validate_data(nv50, p);
 	nv50_program_validate_code(nv50, p);
 
+	if (!(nv50->dirty & NV50_NEW_FRAGPROG))
+		return NULL;
+
 	so = so_new(6, 7, 2);
 	so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
 	so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
@@ -4355,6 +4361,9 @@ nv50_geomprog_validate(struct nv50_context *nv50)
 	nv50_program_validate_data(nv50, p);
 	nv50_program_validate_code(nv50, p);
 
+	if (!(nv50->dirty & NV50_NEW_GEOMPROG))
+		return NULL;
+
 	so = so_new(6, 7, 2);
 	so_method(so, tesla, NV50TCL_GP_ADDRESS_HIGH, 2);
 	so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
-- 
cgit v1.2.3


From 885d2eceb1517fa11be902ef826de3ba8620d085 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Thu, 4 Mar 2010 10:55:50 +1000
Subject: nv50: move magic 0x142c stuff to after state emission

---
 src/gallium/drivers/nv50/nv50_state_validate.c |  9 ++++++++-
 src/gallium/drivers/nv50/nv50_vbo.c            | 10 ----------
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index 12e427d38b..0f27605372 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -408,7 +408,8 @@ boolean
 nv50_state_validate(struct nv50_context *nv50, unsigned wait_dwords)
 {
 	struct nouveau_channel *chan = nv50->screen->base.channel;
-	unsigned nr_relocs = 128, nr_dwords = wait_dwords + 128;
+	struct nouveau_grobj *tesla = nv50->screen->tesla;
+	unsigned nr_relocs = 128, nr_dwords = wait_dwords + 128 + 4;
 	int ret, i;
 
 	for (i = 0; i < validate_list_len; i++) {
@@ -466,6 +467,12 @@ nv50_state_validate(struct nv50_context *nv50, unsigned wait_dwords)
 	 * still.  This can cause all sorts of fun issues.
 	 */
 	nv50_state_flush_notify(chan);
+
+	/* No idea.. */
+	BEGIN_RING(chan, tesla, 0x142c, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, tesla, 0x142c, 1);
+	OUT_RING  (chan, 0);
 	return TRUE;
 }
 
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index 215dfb74fc..c099afe212 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -485,11 +485,6 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
 		return;
 	chan->flush_notify = nv50_state_flush_notify;
 
-	BEGIN_RING(chan, tesla, 0x142c, 1);
-	OUT_RING  (chan, 0);
-	BEGIN_RING(chan, tesla, 0x142c, 1);
-	OUT_RING  (chan, 0);
-
 	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
 	OUT_RING  (chan, nv50_prim(mode));
 
@@ -692,11 +687,6 @@ nv50_draw_elements(struct pipe_context *pipe,
 		return;
 	chan->flush_notify = nv50_state_flush_notify;
 
-	BEGIN_RING(chan, tesla, 0x142c, 1);
-	OUT_RING  (chan, 0);
-	BEGIN_RING(chan, tesla, 0x142c, 1);
-	OUT_RING  (chan, 0);
-
 	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
 	OUT_RING  (chan, nv50_prim(mode));
 
-- 
cgit v1.2.3


From 2b8c0cf16959c07341309484da41b2e9a17e5266 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Thu, 4 Mar 2010 11:13:51 +1000
Subject: nv50: make use of scissor enable/disable method

---
 src/gallium/drivers/nv50/nv50_context.h        |  1 -
 src/gallium/drivers/nv50/nv50_screen.c         |  4 ----
 src/gallium/drivers/nv50/nv50_state.c          |  5 ++++-
 src/gallium/drivers/nv50/nv50_state_validate.c | 23 ++++-------------------
 4 files changed, 8 insertions(+), 25 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index ef0e99f0b1..973fdd23a2 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -126,7 +126,6 @@ struct nv50_state {
 	struct nouveau_stateobj *hw[64];
 	uint64_t hw_dirty;
 
-	unsigned scissor_enabled;
 	unsigned miptree_nr[PIPE_SHADER_TYPES];
 	struct nouveau_stateobj *vtxbuf;
 	struct nouveau_stateobj *vtxattr;
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index 2a1b64ffdc..114ae9b386 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -485,10 +485,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	so_method(so, screen->tesla, NV50TCL_LINKED_TSC, 1);
 	so_data  (so, 1);
 
-	/* activate first scissor rectangle */
-	so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE(0), 1);
-	so_data  (so, 1);
-
 	so_method(so, screen->tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
 	so_data  (so, 1); /* default edgeflag to TRUE */
 
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index ffbf3473a1..b0e5552eff 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -302,7 +302,7 @@ static void *
 nv50_rasterizer_state_create(struct pipe_context *pipe,
 			     const struct pipe_rasterizer_state *cso)
 {
-	struct nouveau_stateobj *so = so_new(15, 21, 0);
+	struct nouveau_stateobj *so = so_new(16, 22, 0);
 	struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla;
 	struct nv50_rasterizer_stateobj *rso =
 		CALLOC_STRUCT(nv50_rasterizer_stateobj);
@@ -314,6 +314,9 @@ nv50_rasterizer_state_create(struct pipe_context *pipe,
 	 * 	- point_sprite / sprite_coord_mode
 	 */
 
+	so_method(so, tesla, NV50TCL_SCISSOR_ENABLE(0), 1);
+	so_data  (so, cso->scissor);
+
 	so_method(so, tesla, NV50TCL_SHADE_MODEL, 1);
 	so_data  (so, cso->flatshade ? NV50TCL_SHADE_MODEL_FLAT :
 				       NV50TCL_SHADE_MODEL_SMOOTH);
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index 0f27605372..2aaee31c0e 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -167,9 +167,6 @@ validate_fb(struct nv50_context *nv50)
 	so_data  (so, w << 16);
 	so_data  (so, h << 16);
 
-	/* we set scissors to framebuffer size when they're 'turned off' */
-	nv50->dirty |= NV50_NEW_SCISSOR;
-	so_ref(NULL, &nv50->state.hw[12]);
 	return so;
 }
 
@@ -264,26 +261,14 @@ validate_stipple(struct nv50_context *nv50)
 static struct nouveau_stateobj *
 validate_scissor(struct nv50_context *nv50)
 {
-	struct pipe_rasterizer_state *rast = &nv50->rasterizer->pipe;
-	struct pipe_scissor_state *s = &nv50->scissor;
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
+        struct pipe_scissor_state *s = &nv50->scissor;
 	struct nouveau_stateobj *so;
 
-	if (nv50->state.hw[12] &&
-	    (rast->scissor == 0 && nv50->state.scissor_enabled == 0))
-		return NULL;
-	nv50->state.scissor_enabled = rast->scissor;
-
 	so = so_new(1, 2, 0);
 	so_method(so, tesla, NV50TCL_SCISSOR_HORIZ(0), 2);
-	if (nv50->state.scissor_enabled) {
-		so_data(so, (s->maxx << 16) | s->minx);
-		so_data(so, (s->maxy << 16) | s->miny);
-	} else {
-		so_data(so, (nv50->framebuffer.width << 16));
-		so_data(so, (nv50->framebuffer.height << 16));
-	}
-
+	so_data  (so, (s->maxx << 16) | s->minx);
+	so_data  (so, (s->maxy << 16) | s->miny);
 	return so;
 }
 
@@ -374,7 +359,7 @@ struct state_validate {
 	{ validate_blend_colour   , NV50_NEW_BLEND_COLOUR                     },
 	{ validate_stencil_ref    , NV50_NEW_STENCIL_REF                      },
 	{ validate_stipple        , NV50_NEW_STIPPLE                          },
-	{ validate_scissor        , NV50_NEW_SCISSOR | NV50_NEW_RASTERIZER    },
+	{ validate_scissor        , NV50_NEW_SCISSOR                          },
 	{ validate_viewport       , NV50_NEW_VIEWPORT                         },
 	{ validate_sampler        , NV50_NEW_SAMPLER                          },
 	{ nv50_tex_validate       , NV50_NEW_TEXTURE | NV50_NEW_SAMPLER       },
-- 
cgit v1.2.3


From a527716e81a81f73c73d3744b4803c27a053ade2 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Thu, 4 Mar 2010 13:47:41 +1000
Subject: nv50: rip out inline push draw paths temporarily

---
 src/gallium/drivers/nv50/nv50_context.h |   2 -
 src/gallium/drivers/nv50/nv50_vbo.c     | 561 +-------------------------------
 2 files changed, 12 insertions(+), 551 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 973fdd23a2..15f33fa4f4 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -163,8 +163,6 @@ struct nv50_context {
 	unsigned sampler_nr[PIPE_SHADER_TYPES];
 	struct nv50_miptree *miptree[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
 	unsigned miptree_nr[PIPE_SHADER_TYPES];
-
-	uint16_t vbo_fifo;
 };
 
 static INLINE struct nv50_context *
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index c099afe212..bab9530852 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -27,18 +27,6 @@
 
 #include "nv50_context.h"
 
-static boolean
-nv50_push_elements_u08(struct nv50_context *, uint8_t *, unsigned);
-
-static boolean
-nv50_push_elements_u16(struct nv50_context *, uint16_t *, unsigned);
-
-static boolean
-nv50_push_elements_u32(struct nv50_context *, uint32_t *, unsigned);
-
-static boolean
-nv50_push_arrays(struct nv50_context *, unsigned, unsigned);
-
 #define NV50_USING_LOATHED_EDGEFLAG(ctx) ((ctx)->vertprog->cfg.edgeflag_in < 16)
 
 static INLINE unsigned
@@ -162,61 +150,6 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve)
 	return (hw_type | hw_size);
 }
 
-/* For instanced drawing from user buffers, hitting the FIFO repeatedly
- * with the same vertex data is probably worse than uploading all data.
- */
-static boolean
-nv50_upload_vtxbuf(struct nv50_context *nv50, unsigned i)
-{
-	struct nv50_screen *nscreen = nv50->screen;
-	struct pipe_screen *pscreen = &nscreen->base.base;
-	struct pipe_buffer *buf = nscreen->strm_vbuf[i];
-	struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i];
-	uint8_t *src;
-	unsigned size = align(vb->buffer->size, 4096);
-
-	if (buf && buf->size < size)
-		pipe_buffer_reference(&nscreen->strm_vbuf[i], NULL);
-
-	if (!nscreen->strm_vbuf[i]) {
-		nscreen->strm_vbuf[i] = pipe_buffer_create(
-			pscreen, 0, PIPE_BUFFER_USAGE_VERTEX, size);
-		buf = nscreen->strm_vbuf[i];
-	}
-
-	src = pipe_buffer_map(pscreen, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ);
-	if (!src)
-		return FALSE;
-	src += vb->buffer_offset;
-
-	size = (vb->max_index + 1) * vb->stride + 16; /* + 16 is for stride 0 */
-	if (vb->buffer_offset + size > vb->buffer->size)
-		size = vb->buffer->size - vb->buffer_offset;
-
-	pipe_buffer_write(pscreen, buf, vb->buffer_offset, size, src);
-	pipe_buffer_unmap(pscreen, vb->buffer);
-
-	vb->buffer = buf; /* don't pipe_reference, this is a private copy */
-	return TRUE;
-}
-
-static void
-nv50_upload_user_vbufs(struct nv50_context *nv50)
-{
-	unsigned i;
-
-	if (nv50->vbo_fifo)
-		nv50->dirty |= NV50_NEW_ARRAYS;
-	if (!(nv50->dirty & NV50_NEW_ARRAYS))
-		return;
-
-	for (i = 0; i < nv50->vtxbuf_nr; ++i) {
-		if (nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX)
-			continue;
-		nv50_upload_vtxbuf(nv50, i);
-	}
-}
-
 static void
 nv50_set_static_vtxattr(struct nv50_context *nv50, unsigned i, void *data)
 {
@@ -257,36 +190,6 @@ nv50_set_static_vtxattr(struct nv50_context *nv50, unsigned i, void *data)
 	}
 }
 
-static unsigned
-init_per_instance_arrays_immd(struct nv50_context *nv50,
-			      unsigned startInstance,
-			      unsigned pos[16], unsigned step[16])
-{
-	struct nouveau_bo *bo;
-	unsigned i, b, count = 0;
-
-	for (i = 0; i < nv50->vtxelt->num_elements; ++i) {
-		if (!nv50->vtxelt->pipe[i].instance_divisor)
-			continue;
-		++count;
-		b = nv50->vtxelt->pipe[i].vertex_buffer_index;
-
-		pos[i] = nv50->vtxelt->pipe[i].src_offset +
-			nv50->vtxbuf[b].buffer_offset +
-			startInstance * nv50->vtxbuf[b].stride;
-		step[i] = startInstance %
-			nv50->vtxelt->pipe[i].instance_divisor;
-
-		bo = nouveau_bo(nv50->vtxbuf[b].buffer);
-		if (!bo->map)
-			nouveau_bo_map(bo, NOUVEAU_BO_RD);
-
-		nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]);
-	}
-
-	return count;
-}
-
 static unsigned
 init_per_instance_arrays(struct nv50_context *nv50,
 			 unsigned startInstance,
@@ -299,10 +202,6 @@ init_per_instance_arrays(struct nv50_context *nv50,
 	unsigned i, b, count = 0, num_elements = nv50->vtxelt->num_elements;
 	const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
 
-	if (nv50->vbo_fifo)
-		return init_per_instance_arrays_immd(nv50, startInstance,
-						     pos, step);
-
 	so = so_new(num_elements, num_elements * 2, num_elements * 2);
 
 	for (i = 0; i < nv50->vtxelt->num_elements; ++i) {
@@ -338,28 +237,6 @@ init_per_instance_arrays(struct nv50_context *nv50,
 	return count;
 }
 
-static void
-step_per_instance_arrays_immd(struct nv50_context *nv50,
-			      unsigned pos[16], unsigned step[16])
-{
-	struct nouveau_bo *bo;
-	unsigned i, b;
-
-	for (i = 0; i < nv50->vtxelt->num_elements; ++i) {
-		if (!nv50->vtxelt->pipe[i].instance_divisor)
-			continue;
-		if (++step[i] != nv50->vtxelt->pipe[i].instance_divisor)
-			continue;
-		b = nv50->vtxelt->pipe[i].vertex_buffer_index;
-		bo = nouveau_bo(nv50->vtxbuf[b].buffer);
-
-		step[i] = 0;
-		pos[i] += nv50->vtxbuf[b].stride;
-
-		nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]);
-	}
-}
-
 static void
 step_per_instance_arrays(struct nv50_context *nv50,
 			 unsigned pos[16], unsigned step[16])
@@ -371,11 +248,6 @@ step_per_instance_arrays(struct nv50_context *nv50,
 	unsigned i, b, num_elements = nv50->vtxelt->num_elements;
 	const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
 
-	if (nv50->vbo_fifo) {
-		step_per_instance_arrays_immd(nv50, pos, step);
-		return;
-	}
-
 	so = so_new(num_elements, num_elements * 2, num_elements * 2);
 
 	for (i = 0; i < nv50->vtxelt->num_elements; ++i) {
@@ -401,16 +273,6 @@ step_per_instance_arrays(struct nv50_context *nv50,
 	so_emit(chan, nv50->state.instbuf);
 }
 
-static INLINE void
-nv50_unmap_vbufs(struct nv50_context *nv50)
-{
-        unsigned i;
-
-        for (i = 0; i < nv50->vtxbuf_nr; ++i)
-                if (nouveau_bo(nv50->vtxbuf[i].buffer)->map)
-                        nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer));
-}
-
 void
 nv50_draw_arrays_instanced(struct pipe_context *pipe,
 			   unsigned mode, unsigned start, unsigned count,
@@ -422,9 +284,6 @@ nv50_draw_arrays_instanced(struct pipe_context *pipe,
 	unsigned i, nz_divisors;
 	unsigned step[16], pos[16];
 
-	if (!NV50_USING_LOATHED_EDGEFLAG(nv50))
-		nv50_upload_user_vbufs(nv50);
-
 	if (!nv50_state_validate(nv50, 0))
 		return;
 	chan->flush_notify = nv50_state_flush_notify;
@@ -437,14 +296,9 @@ nv50_draw_arrays_instanced(struct pipe_context *pipe,
 
 	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
 	OUT_RING  (chan, nv50_prim(mode));
-
-	if (nv50->vbo_fifo)
-		nv50_push_arrays(nv50, start, count);
-	else {
-		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
-		OUT_RING  (chan, start);
-		OUT_RING  (chan, count);
-	}
+	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
+	OUT_RING  (chan, start);
+	OUT_RING  (chan, count);
 	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
 	OUT_RING  (chan, 0);
 
@@ -454,20 +308,14 @@ nv50_draw_arrays_instanced(struct pipe_context *pipe,
 
 		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
 		OUT_RING  (chan, nv50_prim(mode) | (1 << 28));
-
-		if (nv50->vbo_fifo)
-			nv50_push_arrays(nv50, start, count);
-		else {
-			BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
-			OUT_RING  (chan, start);
-			OUT_RING  (chan, count);
-		}
+		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
+		OUT_RING  (chan, start);
+		OUT_RING  (chan, count);
 		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
 		OUT_RING  (chan, 0);
 	}
 
 	chan->flush_notify = NULL;
-	nv50_unmap_vbufs(nv50);
 
 	so_ref(NULL, &nv50->state.instbuf);
 }
@@ -479,7 +327,6 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
 	struct nv50_context *nv50 = nv50_context(pipe);
 	struct nouveau_channel *chan = nv50->screen->tesla->channel;
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
-	boolean ret;
 
 	if (!nv50_state_validate(nv50, 11))
 		return;
@@ -487,24 +334,13 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
 
 	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
 	OUT_RING  (chan, nv50_prim(mode));
-
-	if (nv50->vbo_fifo)
-		ret = nv50_push_arrays(nv50, start, count);
-	else {
-		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
-		OUT_RING  (chan, start);
-		OUT_RING  (chan, count);
-		ret = TRUE;
-	}
+	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
+	OUT_RING  (chan, start);
+	OUT_RING  (chan, count);
 	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
 	OUT_RING  (chan, 0);
 
 	chan->flush_notify = NULL;
-	nv50_unmap_vbufs(nv50);
-
-        /* XXX: not sure what to do if ret != TRUE: flush and retry?
-         */
-        assert(ret);
 }
 
 static INLINE boolean
@@ -516,9 +352,6 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map,
 
 	map += start;
 
-	if (nv50->vbo_fifo)
-		return nv50_push_elements_u08(nv50, map, count);
-
 	if (count & 1) {
 		BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
 		OUT_RING  (chan, map[0]);
@@ -549,9 +382,6 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
 
 	map += start;
 
-	if (nv50->vbo_fifo)
-		return nv50_push_elements_u16(nv50, map, count);
-
 	if (count & 1) {
 		BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
 		OUT_RING  (chan, map[0]);
@@ -582,9 +412,6 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map,
 
 	map += start;
 
-	if (nv50->vbo_fifo)
-		return nv50_push_elements_u32(nv50, map, count);
-
 	while (count) {
 		unsigned nr = count > 2047 ? 2047 : count;
 
@@ -632,9 +459,6 @@ nv50_draw_elements_instanced(struct pipe_context *pipe,
 
 	map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
 
-	if (!NV50_USING_LOATHED_EDGEFLAG(nv50))
-		nv50_upload_user_vbufs(nv50);
-
 	if (!nv50_state_validate(nv50, 0))
 		return;
 	chan->flush_notify = nv50_state_flush_notify;
@@ -667,7 +491,6 @@ nv50_draw_elements_instanced(struct pipe_context *pipe,
 	}
 
 	chan->flush_notify = NULL;
-	nv50_unmap_vbufs(nv50);
 
 	so_ref(NULL, &nv50->state.instbuf);
 }
@@ -690,13 +513,13 @@ nv50_draw_elements(struct pipe_context *pipe,
 	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
 	OUT_RING  (chan, nv50_prim(mode));
 
-	if (!nv50->vbo_fifo && indexSize == 4) {
+	if (indexSize == 4) {
 		BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x30000, 0);
 		OUT_RING  (chan, count);
 		nouveau_pushbuf_submit(chan, nouveau_bo(indexBuffer),
 				       start << 2, count << 2);
 	} else
-	if (!nv50->vbo_fifo && indexSize == 2) {
+	if (indexSize == 2) {
 		unsigned vb_start = (start & ~1);
 		unsigned vb_end = (start + count + 1) & ~1;
 		unsigned dwords = (vb_end - vb_start) >> 1;
@@ -713,7 +536,6 @@ nv50_draw_elements(struct pipe_context *pipe,
 		map = pipe_buffer_map(pscreen, indexBuffer,
 				      PIPE_BUFFER_USAGE_CPU_READ);
 		nv50_draw_elements_inline(nv50, map, indexSize, start, count);
-		nv50_unmap_vbufs(nv50);
 		pipe_buffer_unmap(pscreen, indexBuffer);
 	}
 
@@ -806,15 +628,8 @@ nv50_vbo_validate(struct nv50_context *nv50)
 	/* don't validate if Gallium took away our buffers */
 	if (nv50->vtxbuf_nr == 0)
 		return NULL;
-	nv50->vbo_fifo = 0;
 
-	for (i = 0; i < nv50->vtxbuf_nr; ++i)
-		if (nv50->vtxbuf[i].stride &&
-		    !(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX))
-			nv50->vbo_fifo = 0xffff;
-
-	if (NV50_USING_LOATHED_EDGEFLAG(nv50))
-		nv50->vbo_fifo = 0xffff; /* vertprog can't set edgeflag */
+	assert(!NV50_USING_LOATHED_EDGEFLAG(nv50));
 
 	n_ve = MAX2(nv50->vtxelt->num_elements, nv50->state.vtxelt_nr);
 
@@ -837,19 +652,9 @@ nv50_vbo_validate(struct nv50_context *nv50)
 			so_method(vtxbuf, tesla,
 				  NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
 			so_data  (vtxbuf, 0);
-
-			nv50->vbo_fifo &= ~(1 << i);
 			continue;
 		}
 
-		if (nv50->vbo_fifo) {
-			so_data  (vtxfmt, hw |
-				  (ve->instance_divisor ? (1 << 4) : i));
-			so_method(vtxbuf, tesla,
-				  NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
-			so_data  (vtxbuf, 0);
-			continue;
-		}
 		so_data(vtxfmt, hw | i);
 
 		so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 3);
@@ -886,346 +691,4 @@ nv50_vbo_validate(struct nv50_context *nv50)
 	return vtxfmt;
 }
 
-typedef void (*pfn_push)(struct nouveau_channel *, void *);
-
-struct nv50_vbo_emitctx
-{
-	pfn_push push[16];
-	uint8_t *map[16];
-	unsigned stride[16];
-	unsigned nr_ve;
-	unsigned vtx_dwords;
-	unsigned vtx_max;
-
-	float edgeflag;
-	unsigned ve_edgeflag;
-};
 
-static INLINE void
-emit_vtx_next(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit)
-{
-	unsigned i;
-
-	for (i = 0; i < emit->nr_ve; ++i) {
-		emit->push[i](chan, emit->map[i]);
-		emit->map[i] += emit->stride[i];
-	}
-}
-
-static INLINE void
-emit_vtx(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit,
-	 uint32_t vi)
-{
-	unsigned i;
-
-	for (i = 0; i < emit->nr_ve; ++i)
-		emit->push[i](chan, emit->map[i] + emit->stride[i] * vi);
-}
-
-static INLINE boolean
-nv50_map_vbufs(struct nv50_context *nv50)
-{
-	int i;
-
-	for (i = 0; i < nv50->vtxbuf_nr; ++i) {
-		struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i];
-		unsigned size = vb->stride * (vb->max_index + 1) + 16;
-
-		if (nouveau_bo(vb->buffer)->map)
-			continue;
-
-		size = vb->stride * (vb->max_index + 1) + 16;
-		size = MIN2(size, vb->buffer->size);
-		if (!size)
-			size = vb->buffer->size;
-
-		if (nouveau_bo_map_range(nouveau_bo(vb->buffer),
-					 0, size, NOUVEAU_BO_RD))
-			break;
-	}
-
-	if (i == nv50->vtxbuf_nr)
-		return TRUE;
-	for (; i >= 0; --i)
-		nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer));
-	return FALSE;
-}
-
-static void
-emit_b32_1(struct nouveau_channel *chan, void *data)
-{
-	uint32_t *v = data;
-
-	OUT_RING(chan, v[0]);
-}
-
-static void
-emit_b32_2(struct nouveau_channel *chan, void *data)
-{
-	uint32_t *v = data;
-
-	OUT_RING(chan, v[0]);
-	OUT_RING(chan, v[1]);
-}
-
-static void
-emit_b32_3(struct nouveau_channel *chan, void *data)
-{
-	uint32_t *v = data;
-
-	OUT_RING(chan, v[0]);
-	OUT_RING(chan, v[1]);
-	OUT_RING(chan, v[2]);
-}
-
-static void
-emit_b32_4(struct nouveau_channel *chan, void *data)
-{
-	uint32_t *v = data;
-
-	OUT_RING(chan, v[0]);
-	OUT_RING(chan, v[1]);
-	OUT_RING(chan, v[2]);
-	OUT_RING(chan, v[3]);
-}
-
-static void
-emit_b16_1(struct nouveau_channel *chan, void *data)
-{
-	uint16_t *v = data;
-
-	OUT_RING(chan, v[0]);
-}
-
-static void
-emit_b16_3(struct nouveau_channel *chan, void *data)
-{
-	uint16_t *v = data;
-
-	OUT_RING(chan, (v[1] << 16) | v[0]);
-	OUT_RING(chan, v[2]);
-}
-
-static void
-emit_b08_1(struct nouveau_channel *chan, void *data)
-{
-	uint8_t *v = data;
-
-	OUT_RING(chan, v[0]);
-}
-
-static void
-emit_b08_3(struct nouveau_channel *chan, void *data)
-{
-	uint8_t *v = data;
-
-	OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]);
-}
-
-static boolean
-emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit,
-	     unsigned start)
-{
-	unsigned i;
-
-	if (nv50_map_vbufs(nv50) == FALSE)
-		return FALSE;
-
-	emit->ve_edgeflag = nv50->vertprog->cfg.edgeflag_in;
-
-	emit->edgeflag = 0.5f;
-	emit->nr_ve = 0;
-	emit->vtx_dwords = 0;
-
-	for (i = 0; i < nv50->vtxelt->num_elements; ++i) {
-		struct pipe_vertex_element *ve;
-		struct pipe_vertex_buffer *vb;
-		unsigned n, size, nr_components;
-		const struct util_format_description *desc;
-
-		ve = &nv50->vtxelt->pipe[i];
-		vb = &nv50->vtxbuf[ve->vertex_buffer_index];
-		if (!(nv50->vbo_fifo & (1 << i)) || ve->instance_divisor)
-			continue;
-		n = emit->nr_ve++;
-
-		emit->stride[n] = vb->stride;
-		emit->map[n] = (uint8_t *)nouveau_bo(vb->buffer)->map +
-			vb->buffer_offset +
-			(start * vb->stride + ve->src_offset);
-
-		desc = util_format_description(ve->src_format);
-		assert(desc);
-
-		size = util_format_get_component_bits(
-			ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0);
-		nr_components = util_format_get_nr_components(ve->src_format);
-
-		assert(nr_components > 0 && nr_components <= 4);
-
-		/* It shouldn't be necessary to push the implicit 1s
-		 * for case 3 and size 8 cases 1, 2, 3.
-		 */
-		switch (size) {
-		default:
-			NOUVEAU_ERR("unsupported vtxelt size: %u\n", size);
-			return FALSE;
-		case 32:
-			switch (nr_components) {
-			case 1: emit->push[n] = emit_b32_1; break;
-			case 2: emit->push[n] = emit_b32_2; break;
-			case 3: emit->push[n] = emit_b32_3; break;
-			case 4: emit->push[n] = emit_b32_4; break;
-			}
-			emit->vtx_dwords += nr_components;
-			break;
-		case 16:
-			switch (nr_components) {
-			case 1: emit->push[n] = emit_b16_1; break;
-			case 2: emit->push[n] = emit_b32_1; break;
-			case 3: emit->push[n] = emit_b16_3; break;
-			case 4: emit->push[n] = emit_b32_2; break;
-			}
-			emit->vtx_dwords += (nr_components + 1) >> 1;
-			break;
-		case 8:
-			switch (nr_components) {
-			case 1: emit->push[n] = emit_b08_1; break;
-			case 2: emit->push[n] = emit_b16_1; break;
-			case 3: emit->push[n] = emit_b08_3; break;
-			case 4: emit->push[n] = emit_b32_1; break;
-			}
-			emit->vtx_dwords += 1;
-			break;
-		}
-	}
-
-	emit->vtx_max = 512 / emit->vtx_dwords;
-	if (emit->ve_edgeflag < 16)
-		emit->vtx_max = 1;
-
-	return TRUE;
-}
-
-static INLINE void
-set_edgeflag(struct nouveau_channel *chan,
-	     struct nouveau_grobj *tesla,
-	     struct nv50_vbo_emitctx *emit, uint32_t index)
-{
-	unsigned i = emit->ve_edgeflag;
-
-	if (i < 16) {
-		float f = *((float *)(emit->map[i] + index * emit->stride[i]));
-
-		if (emit->edgeflag != f) {
-			emit->edgeflag = f;
-
-			BEGIN_RING(chan, tesla, 0x15e4, 1);
-			OUT_RING  (chan, f ? 1 : 0);
-		}
-	}
-}
-
-static boolean
-nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count)
-{
-	struct nouveau_channel *chan = nv50->screen->base.channel;
-	struct nouveau_grobj *tesla = nv50->screen->tesla;
-	struct nv50_vbo_emitctx emit;
-
-	if (emit_prepare(nv50, &emit, start) == FALSE)
-		return FALSE;
-
-	while (count) {
-		unsigned i, dw, nr = MIN2(count, emit.vtx_max);
-	        dw = nr * emit.vtx_dwords;
-
-		set_edgeflag(chan, tesla, &emit, 0); /* nr will be 1 */
-
-		BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
-		for (i = 0; i < nr; ++i)
-			emit_vtx_next(chan, &emit);
-
-		count -= nr;
-	}
-
-	return TRUE;
-}
-
-static boolean
-nv50_push_elements_u32(struct nv50_context *nv50, uint32_t *map, unsigned count)
-{
-	struct nouveau_channel *chan = nv50->screen->base.channel;
-	struct nouveau_grobj *tesla = nv50->screen->tesla;
-	struct nv50_vbo_emitctx emit;
-
-	if (emit_prepare(nv50, &emit, 0) == FALSE)
-		return FALSE;
-
-	while (count) {
-		unsigned i, dw, nr = MIN2(count, emit.vtx_max);
-	        dw = nr * emit.vtx_dwords;
-
-		set_edgeflag(chan, tesla, &emit, *map);
-
-		BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
-		for (i = 0; i < nr; ++i)
-			emit_vtx(chan, &emit, *map++);
-
-		count -= nr;
-	}
-
-	return TRUE;
-}
-
-static boolean
-nv50_push_elements_u16(struct nv50_context *nv50, uint16_t *map, unsigned count)
-{
-	struct nouveau_channel *chan = nv50->screen->base.channel;
-	struct nouveau_grobj *tesla = nv50->screen->tesla;
-	struct nv50_vbo_emitctx emit;
-
-	if (emit_prepare(nv50, &emit, 0) == FALSE)
-		return FALSE;
-
-	while (count) {
-		unsigned i, dw, nr = MIN2(count, emit.vtx_max);
-	        dw = nr * emit.vtx_dwords;
-
-		set_edgeflag(chan, tesla, &emit, *map);
-
-		BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
-		for (i = 0; i < nr; ++i)
-			emit_vtx(chan, &emit, *map++);
-
-		count -= nr;
-	}
-
-	return TRUE;
-}
-
-static boolean
-nv50_push_elements_u08(struct nv50_context *nv50, uint8_t *map, unsigned count)
-{
-	struct nouveau_channel *chan = nv50->screen->base.channel;
-	struct nouveau_grobj *tesla = nv50->screen->tesla;
-	struct nv50_vbo_emitctx emit;
-
-	if (emit_prepare(nv50, &emit, 0) == FALSE)
-		return FALSE;
-
-	while (count) {
-		unsigned i, dw, nr = MIN2(count, emit.vtx_max);
-	        dw = nr * emit.vtx_dwords;
-
-		set_edgeflag(chan, tesla, &emit, *map);
-
-		BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
-		for (i = 0; i < nr; ++i)
-			emit_vtx(chan, &emit, *map++);
-
-		count -= nr;
-	}
-
-	return TRUE;
-}
-- 
cgit v1.2.3


From 621c4609f871d3e84d5f7fd98f11ee9ddf2a1f7c Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Fri, 5 Mar 2010 10:08:12 +1000
Subject: nv50: reimplement draw_arrays_instanced(), do draw_arrays() in terms
 of it

This makes draw_arrays()/draw_arrays_instanced() do the right thing and
not require the (probably broken anyway) flush_notify() usage.

It also fixes a potential bug in the behaviour of reading InstanceID from
shaders, where 0 should be read for non-instanced drawing, previously it
was possible to read non-0 ids if mixing instanced/non-instanced.
---
 src/gallium/drivers/nv50/nv50_vbo.c | 278 +++++++++++++++++-------------------
 1 file changed, 134 insertions(+), 144 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index bab9530852..5a9728d7be 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -150,127 +150,58 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve)
 	return (hw_type | hw_size);
 }
 
-static void
-nv50_set_static_vtxattr(struct nv50_context *nv50, unsigned i, void *data)
-{
-	struct nouveau_grobj *tesla = nv50->screen->tesla;
-	struct nouveau_channel *chan = tesla->channel;
-	float v[4];
-	enum pipe_format pf = nv50->vtxelt->pipe[i].src_format;
-	unsigned nr_components = util_format_get_nr_components(pf);
-
-	util_format_read_4f(pf, v, 0, data, 0, 0, 0, 1, 1);
-
-	switch (nr_components) {
-	case 4:
-		BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_4F_X(i), 4);
-		OUT_RINGf (chan, v[0]);
-		OUT_RINGf (chan, v[1]);
-		OUT_RINGf (chan, v[2]);
-		OUT_RINGf (chan, v[3]);
-		break;
-	case 3:
-		BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_3F_X(i), 3);
-		OUT_RINGf (chan, v[0]);
-		OUT_RINGf (chan, v[1]);
-		OUT_RINGf (chan, v[2]);
-		break;
-	case 2:
-		BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_2F_X(i), 2);
-		OUT_RINGf (chan, v[0]);
-		OUT_RINGf (chan, v[1]);
-		break;
-	case 1:
-		BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_1F(i), 1);
-		OUT_RINGf (chan, v[0]);
-		break;
-	default:
-		assert(0);
-		break;
-	}
-}
-
-static unsigned
-init_per_instance_arrays(struct nv50_context *nv50,
-			 unsigned startInstance,
-			 unsigned pos[16], unsigned step[16])
-{
-	struct nouveau_grobj *tesla = nv50->screen->tesla;
-	struct nouveau_channel *chan = tesla->channel;
+struct instance {
 	struct nouveau_bo *bo;
-	struct nouveau_stateobj *so;
-	unsigned i, b, count = 0, num_elements = nv50->vtxelt->num_elements;
-	const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+	unsigned delta;
+	unsigned stride;
+	unsigned step;
+	unsigned divisor;
+};
 
-	so = so_new(num_elements, num_elements * 2, num_elements * 2);
+static void
+instance_init(struct nv50_context *nv50, struct instance *a, unsigned first)
+{
+	int i;
 
-	for (i = 0; i < nv50->vtxelt->num_elements; ++i) {
-		if (!nv50->vtxelt->pipe[i].instance_divisor)
-			continue;
-		++count;
-		b = nv50->vtxelt->pipe[i].vertex_buffer_index;
+	for (i = 0; i < nv50->vtxelt->num_elements; i++) {
+		struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i];
+		struct pipe_vertex_buffer *vb;
 
-		pos[i] = nv50->vtxelt->pipe[i].src_offset +
-			nv50->vtxbuf[b].buffer_offset +
-			startInstance * nv50->vtxbuf[b].stride;
+		a[i].divisor = ve->instance_divisor;
+		if (a[i].divisor) {
+			vb = &nv50->vtxbuf[ve->vertex_buffer_index];
 
-		if (!startInstance) {
-			step[i] = 0;
-			continue;
+			a[i].bo = nouveau_bo(vb->buffer);
+			a[i].stride = vb->stride;
+			a[i].step = first % a[i].divisor;
+			a[i].delta = vb->buffer_offset + ve->src_offset +
+				     (first * a[i].stride);
 		}
-		step[i] = startInstance %
-			nv50->vtxelt->pipe[i].instance_divisor;
-
-		bo = nouveau_bo(nv50->vtxbuf[b].buffer);
-
-		so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
-		so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0);
-		so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0);
-	}
-
-	if (count && startInstance) {
-		so_ref (so, &nv50->state.instbuf); /* for flush notify */
-		so_emit(chan, nv50->state.instbuf);
 	}
-	so_ref (NULL, &so);
-
-	return count;
 }
 
 static void
-step_per_instance_arrays(struct nv50_context *nv50,
-			 unsigned pos[16], unsigned step[16])
+instance_step(struct nv50_context *nv50, struct instance *a)
 {
+	struct nouveau_channel *chan = nv50->screen->tesla->channel;
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
-	struct nouveau_channel *chan = tesla->channel;
-	struct nouveau_bo *bo;
-	struct nouveau_stateobj *so;
-	unsigned i, b, num_elements = nv50->vtxelt->num_elements;
-	const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
-
-	so = so_new(num_elements, num_elements * 2, num_elements * 2);
+	int i;
 
-	for (i = 0; i < nv50->vtxelt->num_elements; ++i) {
-		if (!nv50->vtxelt->pipe[i].instance_divisor)
+	for (i = 0; i < nv50->vtxelt->num_elements; i++) {
+		if (!a[i].divisor)
 			continue;
-		b = nv50->vtxelt->pipe[i].vertex_buffer_index;
 
-		if (++step[i] == nv50->vtxelt->pipe[i].instance_divisor) {
-			step[i] = 0;
-			pos[i] += nv50->vtxbuf[b].stride;
+		BEGIN_RING(chan, tesla,
+			   NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
+		OUT_RELOCh(chan, a[i].bo, a[i].delta, NOUVEAU_BO_RD |
+			   NOUVEAU_BO_VRAM | NOUVEAU_BO_GART);
+		OUT_RELOCl(chan, a[i].bo, a[i].delta, NOUVEAU_BO_RD |
+			   NOUVEAU_BO_VRAM | NOUVEAU_BO_GART);
+		if (++a[i].step == a[i].divisor) {
+			a[i].step = 0;
+			a[i].delta += a[i].stride;
 		}
-
-		bo = nouveau_bo(nv50->vtxbuf[b].buffer);
-
-		so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
-		so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0);
-		so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0);
 	}
-
-	so_ref (so, &nv50->state.instbuf); /* for flush notify */
-	so_ref (NULL, &so);
-
-	so_emit(chan, nv50->state.instbuf);
 }
 
 void
@@ -281,66 +212,43 @@ nv50_draw_arrays_instanced(struct pipe_context *pipe,
 	struct nv50_context *nv50 = nv50_context(pipe);
 	struct nouveau_channel *chan = nv50->screen->tesla->channel;
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
-	unsigned i, nz_divisors;
-	unsigned step[16], pos[16];
+	struct instance a[16];
+	unsigned prim = nv50_prim(mode);
 
-	if (!nv50_state_validate(nv50, 0))
+	instance_init(nv50, a, startInstance);
+	if (!nv50_state_validate(nv50, 10 + 16*3))
 		return;
-	chan->flush_notify = nv50_state_flush_notify;
-
-	nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step);
 
 	BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
 	OUT_RING  (chan, NV50_CB_AUX | (24 << 8));
 	OUT_RING  (chan, startInstance);
-
-	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
-	OUT_RING  (chan, nv50_prim(mode));
-	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
-	OUT_RING  (chan, start);
-	OUT_RING  (chan, count);
-	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
-	OUT_RING  (chan, 0);
-
-	for (i = 1; i < instanceCount; i++) {
-		if (nz_divisors) /* any non-zero array divisors ? */
-			step_per_instance_arrays(nv50, pos, step);
+	while (instanceCount--) {
+		if (AVAIL_RING(chan) < (7 + 16*3)) {
+			FIRE_RING(chan);
+			if (!nv50_state_validate(nv50, 7 + 16*3)) {
+				assert(0);
+				return;
+			}
+		}
+		instance_step(nv50, a);
 
 		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
-		OUT_RING  (chan, nv50_prim(mode) | (1 << 28));
+		OUT_RING  (chan, prim);
 		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
 		OUT_RING  (chan, start);
 		OUT_RING  (chan, count);
 		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
 		OUT_RING  (chan, 0);
-	}
-
-	chan->flush_notify = NULL;
 
-	so_ref(NULL, &nv50->state.instbuf);
+		prim |= (1 << 28);
+	}
 }
 
 void
 nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
 		 unsigned count)
 {
-	struct nv50_context *nv50 = nv50_context(pipe);
-	struct nouveau_channel *chan = nv50->screen->tesla->channel;
-	struct nouveau_grobj *tesla = nv50->screen->tesla;
-
-	if (!nv50_state_validate(nv50, 11))
-		return;
-	chan->flush_notify = nv50_state_flush_notify;
-
-	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
-	OUT_RING  (chan, nv50_prim(mode));
-	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
-	OUT_RING  (chan, start);
-	OUT_RING  (chan, count);
-	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
-	OUT_RING  (chan, 0);
-
-	chan->flush_notify = NULL;
+	nv50_draw_arrays_instanced(pipe, mode, start, count, 0, 1);
 }
 
 static INLINE boolean
@@ -442,6 +350,88 @@ nv50_draw_elements_inline(struct nv50_context *nv50,
 	}
 }
 
+static unsigned
+init_per_instance_arrays(struct nv50_context *nv50,
+			 unsigned startInstance,
+			 unsigned pos[16], unsigned step[16])
+{
+	struct nouveau_grobj *tesla = nv50->screen->tesla;
+	struct nouveau_channel *chan = tesla->channel;
+	struct nouveau_bo *bo;
+	struct nouveau_stateobj *so;
+	unsigned i, b, count = 0;
+	const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+
+	so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2);
+
+	for (i = 0; i < nv50->vtxelt_nr; ++i) {
+		if (!nv50->vtxelt[i].instance_divisor)
+			continue;
+		++count;
+		b = nv50->vtxelt[i].vertex_buffer_index;
+
+		pos[i] = nv50->vtxelt[i].src_offset +
+			nv50->vtxbuf[b].buffer_offset +
+			startInstance * nv50->vtxbuf[b].stride;
+
+		if (!startInstance) {
+			step[i] = 0;
+			continue;
+		}
+		step[i] = startInstance % nv50->vtxelt[i].instance_divisor;
+
+		bo = nouveau_bo(nv50->vtxbuf[b].buffer);
+
+		so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
+		so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0);
+		so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0);
+	}
+
+	if (count && startInstance) {
+		so_ref (so, &nv50->state.instbuf); /* for flush notify */
+		so_emit(chan, nv50->state.instbuf);
+	}
+	so_ref (NULL, &so);
+
+	return count;
+}
+
+static void
+step_per_instance_arrays(struct nv50_context *nv50,
+			 unsigned pos[16], unsigned step[16])
+{
+	struct nouveau_grobj *tesla = nv50->screen->tesla;
+	struct nouveau_channel *chan = tesla->channel;
+	struct nouveau_bo *bo;
+	struct nouveau_stateobj *so;
+	unsigned i, b;
+	const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+
+	so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2);
+
+	for (i = 0; i < nv50->vtxelt_nr; ++i) {
+		if (!nv50->vtxelt[i].instance_divisor)
+			continue;
+		b = nv50->vtxelt[i].vertex_buffer_index;
+
+		if (++step[i] == nv50->vtxelt[i].instance_divisor) {
+			step[i] = 0;
+			pos[i] += nv50->vtxbuf[b].stride;
+		}
+
+		bo = nouveau_bo(nv50->vtxbuf[b].buffer);
+
+		so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
+		so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0);
+		so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0);
+	}
+
+	so_ref (so, &nv50->state.instbuf); /* for flush notify */
+	so_ref (NULL, &so);
+
+	so_emit(chan, nv50->state.instbuf);
+}
+
 void
 nv50_draw_elements_instanced(struct pipe_context *pipe,
 			     struct pipe_buffer *indexBuffer,
-- 
cgit v1.2.3


From d8a415425e7761a7bed03a0c383cb3839eaf1b4b Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Fri, 5 Mar 2010 13:25:01 +1000
Subject: nv50: reimplement draw_elements_instance(), use for draw_elements()
 too

This makes draw_elements()/draw_elements_instanced() do the right thing
for the non-inline elements cases, and not require flush_notify().
---
 src/gallium/drivers/nv50/nv50_vbo.c | 252 +++++++++++++-----------------------
 1 file changed, 93 insertions(+), 159 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index 5a9728d7be..54a2fa730a 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -332,104 +332,65 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map,
 	return TRUE;
 }
 
-static INLINE void
-nv50_draw_elements_inline(struct nv50_context *nv50,
-			  void *map, unsigned indexSize,
-			  unsigned start, unsigned count)
-{
-	switch (indexSize) {
-	case 1:
-		nv50_draw_elements_inline_u08(nv50, map, start, count);
-		break;
-	case 2:
-		nv50_draw_elements_inline_u16(nv50, map, start, count);
-		break;
-	case 4:
-		nv50_draw_elements_inline_u32(nv50, map, start, count);
-		break;
-	}
-}
-
-static unsigned
-init_per_instance_arrays(struct nv50_context *nv50,
-			 unsigned startInstance,
-			 unsigned pos[16], unsigned step[16])
-{
-	struct nouveau_grobj *tesla = nv50->screen->tesla;
-	struct nouveau_channel *chan = tesla->channel;
-	struct nouveau_bo *bo;
-	struct nouveau_stateobj *so;
-	unsigned i, b, count = 0;
-	const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
-
-	so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2);
-
-	for (i = 0; i < nv50->vtxelt_nr; ++i) {
-		if (!nv50->vtxelt[i].instance_divisor)
-			continue;
-		++count;
-		b = nv50->vtxelt[i].vertex_buffer_index;
-
-		pos[i] = nv50->vtxelt[i].src_offset +
-			nv50->vtxbuf[b].buffer_offset +
-			startInstance * nv50->vtxbuf[b].stride;
-
-		if (!startInstance) {
-			step[i] = 0;
-			continue;
-		}
-		step[i] = startInstance % nv50->vtxelt[i].instance_divisor;
-
-		bo = nouveau_bo(nv50->vtxbuf[b].buffer);
-
-		so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
-		so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0);
-		so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0);
-	}
-
-	if (count && startInstance) {
-		so_ref (so, &nv50->state.instbuf); /* for flush notify */
-		so_emit(chan, nv50->state.instbuf);
-	}
-	so_ref (NULL, &so);
-
-	return count;
-}
-
 static void
-step_per_instance_arrays(struct nv50_context *nv50,
-			 unsigned pos[16], unsigned step[16])
+nv50_draw_elements_inline(struct pipe_context *pipe,
+			  struct pipe_buffer *indexBuffer, unsigned indexSize,
+			  unsigned mode, unsigned start, unsigned count,
+			  unsigned startInstance, unsigned instanceCount)
 {
+	struct pipe_screen *pscreen = pipe->screen;
+	struct nv50_context *nv50 = nv50_context(pipe);
+	struct nouveau_channel *chan = nv50->screen->tesla->channel;
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
-	struct nouveau_channel *chan = tesla->channel;
-	struct nouveau_bo *bo;
-	struct nouveau_stateobj *so;
-	unsigned i, b;
-	const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+	struct instance a[16];
+	unsigned prim = nv50_prim(mode);
+	void *map;
 
-	so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2);
+	map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
+	assert(map);
+	if (!map)
+		return;
 
-	for (i = 0; i < nv50->vtxelt_nr; ++i) {
-		if (!nv50->vtxelt[i].instance_divisor)
-			continue;
-		b = nv50->vtxelt[i].vertex_buffer_index;
+	instance_init(nv50, a, startInstance);
+	if (!nv50_state_validate(nv50, 0))
+		return;
 
-		if (++step[i] == nv50->vtxelt[i].instance_divisor) {
-			step[i] = 0;
-			pos[i] += nv50->vtxbuf[b].stride;
+	BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
+	OUT_RING  (chan, NV50_CB_AUX | (24 << 8));
+	OUT_RING  (chan, startInstance);
+	while (instanceCount--) {
+		if (AVAIL_RING(chan) < (7 + 16*3)) {
+			FIRE_RING(chan);
+			if (!nv50_state_validate(nv50, 0)) {
+				assert(0);
+				return;
+			}
 		}
+		instance_step(nv50, a);
 
-		bo = nouveau_bo(nv50->vtxbuf[b].buffer);
+		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
+		OUT_RING  (chan, prim);
+		switch (indexSize) {
+		case 1:
+			nv50_draw_elements_inline_u08(nv50, map, start, count);
+			break;
+		case 2:
+			nv50_draw_elements_inline_u16(nv50, map, start, count);
+			break;
+		case 4:
+			nv50_draw_elements_inline_u32(nv50, map, start, count);
+			break;
+		default:
+			assert(0);
+			break;
+		}
+		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
+		OUT_RING  (chan, 0);
 
-		so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
-		so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0);
-		so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0);
+		prim |= (1 << 28);
 	}
 
-	so_ref (so, &nv50->state.instbuf); /* for flush notify */
-	so_ref (NULL, &so);
-
-	so_emit(chan, nv50->state.instbuf);
+	pipe_buffer_unmap(pscreen, indexBuffer);
 }
 
 void
@@ -440,49 +401,62 @@ nv50_draw_elements_instanced(struct pipe_context *pipe,
 			     unsigned startInstance, unsigned instanceCount)
 {
 	struct nv50_context *nv50 = nv50_context(pipe);
+	struct nouveau_channel *chan = nv50->screen->tesla->channel;
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
-	struct nouveau_channel *chan = tesla->channel;
-	struct pipe_screen *pscreen = pipe->screen;
-	void *map;
-	unsigned i, nz_divisors;
-	unsigned step[16], pos[16];
-
-	map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
+	struct instance a[16];
+	unsigned prim = nv50_prim(mode);
 
-	if (!nv50_state_validate(nv50, 0))
+	if (indexSize == 1) {
+		nv50_draw_elements_inline(pipe, indexBuffer, indexSize,
+					  mode, start, count, startInstance,
+					  instanceCount);
 		return;
-	chan->flush_notify = nv50_state_flush_notify;
+	}
 
-	nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step);
+	instance_init(nv50, a, startInstance);
+	if (!nv50_state_validate(nv50, 13 + 16*3))
+		return;
 
 	BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
 	OUT_RING  (chan, NV50_CB_AUX | (24 << 8));
 	OUT_RING  (chan, startInstance);
-
-	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
-	OUT_RING  (chan, nv50_prim(mode));
-
-	nv50_draw_elements_inline(nv50, map, indexSize, start, count);
-
-	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
-	OUT_RING  (chan, 0);
-
-	for (i = 1; i < instanceCount; ++i) {
-		if (nz_divisors) /* any non-zero array divisors ? */
-			step_per_instance_arrays(nv50, pos, step);
+	while (instanceCount--) {
+		if (AVAIL_RING(chan) < (7 + 16*3)) {
+			FIRE_RING(chan);
+			if (!nv50_state_validate(nv50, 10 + 16*3)) {
+				assert(0);
+				return;
+			}
+		}
+		instance_step(nv50, a);
 
 		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
-		OUT_RING  (chan, nv50_prim(mode) | (1 << 28));
-
-		nv50_draw_elements_inline(nv50, map, indexSize, start, count);
-
+		OUT_RING  (chan, prim);
+		if (indexSize == 4) {
+			BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x30000, 0);
+			OUT_RING  (chan, count);
+			nouveau_pushbuf_submit(chan, nouveau_bo(indexBuffer),
+					       start << 2, count << 2);
+		} else
+		if (indexSize == 2) {
+			unsigned vb_start = (start & ~1);
+			unsigned vb_end = (start + count + 1) & ~1;
+			unsigned dwords = (vb_end - vb_start) >> 1;
+
+			BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1);
+			OUT_RING  (chan, ((start & 1) << 31) | count);
+			BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x30000, 0);
+			OUT_RING  (chan, dwords);
+			nouveau_pushbuf_submit(chan, nouveau_bo(indexBuffer),
+					       vb_start << 1, dwords << 2);
+			BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1);
+			OUT_RING  (chan, 0);
+		}
 		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
 		OUT_RING  (chan, 0);
-	}
 
-	chan->flush_notify = NULL;
-
-	so_ref(NULL, &nv50->state.instbuf);
+		prim |= (1 << 28);
+	}
 }
 
 void
@@ -490,48 +464,8 @@ nv50_draw_elements(struct pipe_context *pipe,
 		   struct pipe_buffer *indexBuffer, unsigned indexSize,
 		   unsigned mode, unsigned start, unsigned count)
 {
-	struct nv50_context *nv50 = nv50_context(pipe);
-	struct nouveau_channel *chan = nv50->screen->tesla->channel;
-	struct nouveau_grobj *tesla = nv50->screen->tesla;
-	struct pipe_screen *pscreen = pipe->screen;
-	void *map;
-	
-	if (!nv50_state_validate(nv50, 14))
-		return;
-	chan->flush_notify = nv50_state_flush_notify;
-
-	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
-	OUT_RING  (chan, nv50_prim(mode));
-
-	if (indexSize == 4) {
-		BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x30000, 0);
-		OUT_RING  (chan, count);
-		nouveau_pushbuf_submit(chan, nouveau_bo(indexBuffer),
-				       start << 2, count << 2);
-	} else
-	if (indexSize == 2) {
-		unsigned vb_start = (start & ~1);
-		unsigned vb_end = (start + count + 1) & ~1;
-		unsigned dwords = (vb_end - vb_start) >> 1;
-
-		BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1);
-		OUT_RING  (chan, ((start & 1) << 31) | count);
-		BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x30000, 0);
-		OUT_RING  (chan, dwords);
-		nouveau_pushbuf_submit(chan, nouveau_bo(indexBuffer),
-				       vb_start << 1, dwords << 2);
-		BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1);
-		OUT_RING  (chan, 0);
-	} else {
-		map = pipe_buffer_map(pscreen, indexBuffer,
-				      PIPE_BUFFER_USAGE_CPU_READ);
-		nv50_draw_elements_inline(nv50, map, indexSize, start, count);
-		pipe_buffer_unmap(pscreen, indexBuffer);
-	}
-
-	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
-	OUT_RING  (chan, 0);
-	chan->flush_notify = NULL;
+	nv50_draw_elements_instanced(pipe, indexBuffer, indexSize,
+				     mode, start, count, 0, 1);
 }
 
 static INLINE boolean
-- 
cgit v1.2.3


From 0842829c1813d47e6fdf3de09167d243c029cc90 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Mon, 8 Mar 2010 13:27:46 +1000
Subject: nv50: when doing inline indices, split elt lists so they fit into
 pushbuf

---
 src/gallium/drivers/nouveau/nouveau_util.h | 100 +++++++++++++++
 src/gallium/drivers/nv50/nv50_vbo.c        | 187 ++++++++++++++++-------------
 2 files changed, 202 insertions(+), 85 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nouveau_util.h b/src/gallium/drivers/nouveau/nouveau_util.h
index a10114beab..7f16e31c3f 100644
--- a/src/gallium/drivers/nouveau/nouveau_util.h
+++ b/src/gallium/drivers/nouveau/nouveau_util.h
@@ -88,4 +88,104 @@ static INLINE unsigned log2i(unsigned i)
 	return r;
 }
 
+struct u_split_prim {
+   void *priv;
+   void (*emit)(void *priv, unsigned start, unsigned count);
+   void (*edge)(void *priv, boolean enabled);
+
+   unsigned mode;
+   unsigned start;
+   unsigned p_start;
+   unsigned p_end;
+
+   int repeat_first:1;
+   int close_first:1;
+   int edgeflag_off:1;
+};
+
+static inline void
+u_split_prim_init(struct u_split_prim *s,
+                  unsigned mode, unsigned start, unsigned count)
+{
+   if (mode == PIPE_PRIM_LINE_LOOP) {
+      s->mode = PIPE_PRIM_LINE_STRIP;
+      s->close_first = 1;
+   } else {
+      s->mode = mode;
+      s->close_first = 0;
+   }
+   s->start = start;
+   s->p_start = start;
+   s->p_end = start + count;
+   s->edgeflag_off = 0;
+   s->repeat_first = 0;
+}
+
+static INLINE boolean
+u_split_prim_next(struct u_split_prim *s, unsigned max_verts)
+{
+   int repeat = 0;
+
+   if (s->repeat_first) {
+      s->emit(s->priv, s->start, 1);
+      max_verts--;
+      if (s->edgeflag_off) {
+         s->edge(s->priv, TRUE);
+         s->edgeflag_off = FALSE;
+      }
+   }
+
+   if (s->p_start + s->close_first + max_verts >= s->p_end) {
+      s->emit(s->priv, s->p_start, s->p_end - s->p_start);
+      if (s->close_first)
+         s->emit(s->priv, s->start, 1);
+      return TRUE;
+   }
+
+   switch (s->mode) {
+   case PIPE_PRIM_LINES:
+      max_verts &= ~1;
+      break;
+   case PIPE_PRIM_LINE_STRIP:
+      repeat = 1;
+      break;
+   case PIPE_PRIM_POLYGON:
+      max_verts--;
+      s->emit(s->priv, s->p_start, max_verts);
+      s->edge(s->priv, FALSE);
+      s->emit(s->priv, s->p_start + max_verts, 1);
+      s->p_start += max_verts;
+      s->repeat_first = TRUE;
+      s->edgeflag_off = TRUE;
+      return FALSE;
+   case PIPE_PRIM_TRIANGLES:
+      max_verts = max_verts - (max_verts % 3);
+      break;
+   case PIPE_PRIM_TRIANGLE_STRIP:
+      /* to ensure winding stays correct, always split
+       * on an even number of generated triangles
+       */
+      max_verts = max_verts & ~1;
+      repeat = 2;
+      break;
+   case PIPE_PRIM_TRIANGLE_FAN:
+      s->repeat_first = TRUE;
+      repeat = 1;
+      break;
+   case PIPE_PRIM_QUADS:
+      max_verts &= ~3;
+      break;
+   case PIPE_PRIM_QUAD_STRIP:
+      max_verts &= ~1;
+      repeat = 2;
+      break;
+   default:
+      break;
+   }
+
+   s->emit (s->priv, s->p_start, max_verts);
+   s->p_start += (max_verts - repeat);
+   return FALSE;
+}
+
 #endif
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index 54a2fa730a..8f3c1aaf46 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -25,6 +25,7 @@
 #include "util/u_inlines.h"
 #include "util/u_format.h"
 
+#include "nouveau/nouveau_util.h"
 #include "nv50_context.h"
 
 #define NV50_USING_LOATHED_EDGEFLAG(ctx) ((ctx)->vertprog->cfg.edgeflag_in < 16)
@@ -251,85 +252,83 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
 	nv50_draw_arrays_instanced(pipe, mode, start, count, 0, 1);
 }
 
-static INLINE boolean
-nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map,
-			      unsigned start, unsigned count)
-{
-	struct nouveau_channel *chan = nv50->screen->tesla->channel;
-	struct nouveau_grobj *tesla = nv50->screen->tesla;
+struct inline_ctx {
+	struct nv50_context *nv50;
+	void *map;
+};
 
-	map += start;
+static void
+inline_elt08(void *priv, unsigned start, unsigned count)
+{
+	struct inline_ctx *ctx = priv;
+	struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+	struct nouveau_channel *chan = tesla->channel;
+	uint8_t *map = (uint8_t *)ctx->map + start;
 
 	if (count & 1) {
 		BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
 		OUT_RING  (chan, map[0]);
 		map++;
-		count--;
+		count &= ~1;
 	}
 
-	while (count) {
-		unsigned nr = count > 2046 ? 2046 : count;
-		int i;
-
-		BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1);
-		for (i = 0; i < nr; i += 2)
-			OUT_RING  (chan, (map[i + 1] << 16) | map[i]);
+	count >>= 1;
+	if (!count)
+		return;
 
-		count -= nr;
-		map += nr;
+	BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, count);
+	while (count--) {
+		OUT_RING(chan, (map[1] << 16) | map[0]);
+		map += 2;
 	}
-	return TRUE;
 }
 
-static INLINE boolean
-nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
-			      unsigned start, unsigned count)
+static void
+inline_elt16(void *priv, unsigned start, unsigned count)
 {
-	struct nouveau_channel *chan = nv50->screen->tesla->channel;
-	struct nouveau_grobj *tesla = nv50->screen->tesla;
-
-	map += start;
+	struct inline_ctx *ctx = priv;
+	struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+	struct nouveau_channel *chan = tesla->channel;
+	uint16_t *map = (uint16_t *)ctx->map + start;
 
 	if (count & 1) {
 		BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
 		OUT_RING  (chan, map[0]);
+		count &= ~1;
 		map++;
-		count--;
 	}
 
-	while (count) {
-		unsigned nr = count > 2046 ? 2046 : count;
-		int i;
-
-		BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1);
-		for (i = 0; i < nr; i += 2)
-			OUT_RING  (chan, (map[i + 1] << 16) | map[i]);
+	count >>= 1;
+	if (!count)
+		return;
 
-		count -= nr;
-		map += nr;
+	BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, count);
+	while (count--) {
+		OUT_RING(chan, (map[1] << 16) | map[0]);
+		map += 2;
 	}
-	return TRUE;
 }
 
-static INLINE boolean
-nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map,
-			      unsigned start, unsigned count)
+static void
+inline_elt32(void *priv, unsigned start, unsigned count)
 {
-	struct nouveau_channel *chan = nv50->screen->tesla->channel;
-	struct nouveau_grobj *tesla = nv50->screen->tesla;
+	struct inline_ctx *ctx = priv;
+	struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+	struct nouveau_channel *chan = tesla->channel;
 
-	map += start;
-
-	while (count) {
-		unsigned nr = count > 2047 ? 2047 : count;
+	BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U32, count);
+	OUT_RINGp    (chan, (uint32_t *)ctx->map + start, count);
+}
 
-		BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U32, nr);
-		OUT_RINGp (chan, map, nr);
+static void
+inline_edgeflag(void *priv, boolean enabled)
+{
+	struct inline_ctx *ctx = priv;
+	struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+	struct nouveau_channel *chan = tesla->channel;
 
-		count -= nr;
-		map += nr;
-	}
-	return TRUE;
+	BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
+	OUT_RING  (chan, enabled ? 1 : 0);
 }
 
 static void
@@ -343,51 +342,68 @@ nv50_draw_elements_inline(struct pipe_context *pipe,
 	struct nouveau_channel *chan = nv50->screen->tesla->channel;
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
 	struct instance a[16];
-	unsigned prim = nv50_prim(mode);
-	void *map;
-
-	map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
-	assert(map);
-	if (!map)
+	struct inline_ctx ctx;
+	struct u_split_prim s;
+	boolean nzi = FALSE;
+	unsigned overhead;
+
+	overhead = 16*3; /* potential instance adjustments */
+	overhead += 4; /* Begin()/End() */
+	overhead += 4; /* potential edgeflag disable/reenable */
+	overhead += 3; /* potentially 3 VTX_ELT_U16/U32 packet headers */
+
+	s.priv = &ctx;
+	if (indexSize == 1)
+		s.emit = inline_elt08;
+	else
+	if (indexSize == 2)
+		s.emit = inline_elt16;
+	else
+		s.emit = inline_elt32;
+	s.edge = inline_edgeflag;
+
+	ctx.nv50 = nv50;
+	ctx.map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
+	assert(ctx.map);
+	if (!ctx.map)
 		return;
 
 	instance_init(nv50, a, startInstance);
-	if (!nv50_state_validate(nv50, 0))
+	if (!nv50_state_validate(nv50, overhead + 6 + 3))
 		return;
 
 	BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
 	OUT_RING  (chan, NV50_CB_AUX | (24 << 8));
 	OUT_RING  (chan, startInstance);
 	while (instanceCount--) {
-		if (AVAIL_RING(chan) < (7 + 16*3)) {
-			FIRE_RING(chan);
-			if (!nv50_state_validate(nv50, 0)) {
-				assert(0);
-				return;
+		unsigned max_verts;
+		boolean done;
+
+		u_split_prim_init(&s, mode, start, count);
+		do {
+			if (AVAIL_RING(chan) < (overhead + 6)) {
+				FIRE_RING(chan);
+				if (!nv50_state_validate(nv50, (overhead + 6))) {
+					assert(0);
+					return;
+				}
 			}
-		}
-		instance_step(nv50, a);
 
-		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
-		OUT_RING  (chan, prim);
-		switch (indexSize) {
-		case 1:
-			nv50_draw_elements_inline_u08(nv50, map, start, count);
-			break;
-		case 2:
-			nv50_draw_elements_inline_u16(nv50, map, start, count);
-			break;
-		case 4:
-			nv50_draw_elements_inline_u32(nv50, map, start, count);
-			break;
-		default:
-			assert(0);
-			break;
-		}
-		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
-		OUT_RING  (chan, 0);
+			max_verts = AVAIL_RING(chan) - overhead;
+			if (max_verts > 2047)
+				max_verts = 2047;
+			if (indexSize != 4)
+				max_verts <<= 1;
+			instance_step(nv50, a);
+
+			BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
+			OUT_RING  (chan, nv50_prim(s.mode) | (nzi ? (1<<28) : 0));
+			done = u_split_prim_next(&s, max_verts);
+			BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
+			OUT_RING  (chan, 0);
+		} while (!done);
 
-		prim |= (1 << 28);
+		nzi = TRUE;
 	}
 
 	pipe_buffer_unmap(pscreen, indexBuffer);
@@ -406,7 +422,8 @@ nv50_draw_elements_instanced(struct pipe_context *pipe,
 	struct instance a[16];
 	unsigned prim = nv50_prim(mode);
 
-	if (indexSize == 1) {
+	if (!(indexBuffer->usage & PIPE_BUFFER_USAGE_INDEX) ||
+	    indexSize == 1) {
 		nv50_draw_elements_inline(pipe, indexBuffer, indexSize,
 					  mode, start, count, startInstance,
 					  instanceCount);
-- 
cgit v1.2.3


From 160bda24ed3b8f74d58cfcf55349c6d9e92fb442 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Mon, 8 Mar 2010 17:06:38 +1000
Subject: nv50: re-add immediate mode vertex submission paths

---
 src/gallium/drivers/nv50/Makefile       |   3 +-
 src/gallium/drivers/nv50/nv50_context.h |   9 +
 src/gallium/drivers/nv50/nv50_push.c    | 357 ++++++++++++++++++++++++++++++++
 src/gallium/drivers/nv50/nv50_vbo.c     |  38 +++-
 4 files changed, 399 insertions(+), 8 deletions(-)
 create mode 100644 src/gallium/drivers/nv50/nv50_push.c

diff --git a/src/gallium/drivers/nv50/Makefile b/src/gallium/drivers/nv50/Makefile
index 612aea28a3..5d622e1c13 100644
--- a/src/gallium/drivers/nv50/Makefile
+++ b/src/gallium/drivers/nv50/Makefile
@@ -16,6 +16,7 @@ C_SOURCES = \
 	nv50_surface.c \
 	nv50_tex.c \
 	nv50_transfer.c \
-	nv50_vbo.c
+	nv50_vbo.c \
+	nv50_push.c
 
 include ../../Makefile.template
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 15f33fa4f4..35abfba93b 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -163,6 +163,8 @@ struct nv50_context {
 	unsigned sampler_nr[PIPE_SHADER_TYPES];
 	struct nv50_miptree *miptree[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
 	unsigned miptree_nr[PIPE_SHADER_TYPES];
+
+	unsigned vbo_fifo;
 };
 
 static INLINE struct nv50_context *
@@ -207,6 +209,13 @@ extern void nv50_draw_elements_instanced(struct pipe_context *pipe,
 extern void nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso);
 extern struct nouveau_stateobj *nv50_vbo_validate(struct nv50_context *nv50);
 
+/* nv50_push.c */
+extern void
+nv50_push_elements_instanced(struct pipe_context *, struct pipe_buffer *,
+			     unsigned idxsize, unsigned mode, unsigned start,
+			     unsigned count, unsigned i_start,
+			     unsigned i_count);
+
 /* nv50_clear.c */
 extern void nv50_clear(struct pipe_context *pipe, unsigned buffers,
 		       const float *rgba, double depth, unsigned stencil);
diff --git a/src/gallium/drivers/nv50/nv50_push.c b/src/gallium/drivers/nv50/nv50_push.c
new file mode 100644
index 0000000000..b615f4e054
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_push.c
@@ -0,0 +1,357 @@
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+
+#include "nouveau/nouveau_util.h"
+#include "nv50_context.h"
+
+static INLINE unsigned
+nv50_prim(unsigned mode)
+{
+   switch (mode) {
+   case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS;
+   case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES;
+   case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP;
+   case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP;
+   case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES;
+   case PIPE_PRIM_TRIANGLE_STRIP:
+      return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP;
+   case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN;
+   case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS;
+   case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP;
+   case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON;
+   case PIPE_PRIM_LINES_ADJACENCY:
+      return NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY;
+   case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+      return NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY;
+   case PIPE_PRIM_TRIANGLES_ADJACENCY:
+      return NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY;
+   case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+      return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY;
+   default:
+      break;
+   }
+
+   NOUVEAU_ERR("invalid primitive type %d\n", mode);
+   return NV50TCL_VERTEX_BEGIN_POINTS;
+}
+
+struct push_context {
+   struct nv50_context *nv50;
+
+   unsigned vtx_size;
+
+   void *idxbuf;
+   unsigned idxsize;
+
+   float edgeflag;
+   int edgeflag_attr;
+
+   struct {
+      void *map;
+      unsigned stride;
+      unsigned divisor;
+      unsigned step;
+      void (*push)(struct nouveau_channel *, void *);
+   } attr[16];
+   unsigned attr_nr;
+};
+
+static void
+emit_b32_1(struct nouveau_channel *chan, void *data)
+{
+   uint32_t *v = data;
+
+   OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b32_2(struct nouveau_channel *chan, void *data)
+{
+   uint32_t *v = data;
+
+   OUT_RING(chan, v[0]);
+   OUT_RING(chan, v[1]);
+}
+
+static void
+emit_b32_3(struct nouveau_channel *chan, void *data)
+{
+   uint32_t *v = data;
+
+   OUT_RING(chan, v[0]);
+   OUT_RING(chan, v[1]);
+   OUT_RING(chan, v[2]);
+}
+
+static void
+emit_b32_4(struct nouveau_channel *chan, void *data)
+{
+   uint32_t *v = data;
+
+   OUT_RING(chan, v[0]);
+   OUT_RING(chan, v[1]);
+   OUT_RING(chan, v[2]);
+   OUT_RING(chan, v[3]);
+}
+
+static void
+emit_b16_1(struct nouveau_channel *chan, void *data)
+{
+   uint16_t *v = data;
+
+   OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b16_3(struct nouveau_channel *chan, void *data)
+{
+   uint16_t *v = data;
+
+   OUT_RING(chan, (v[1] << 16) | v[0]);
+   OUT_RING(chan, v[2]);
+}
+
+static void
+emit_b08_1(struct nouveau_channel *chan, void *data)
+{
+   uint8_t *v = data;
+
+   OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b08_3(struct nouveau_channel *chan, void *data)
+{
+   uint8_t *v = data;
+
+   OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]);
+}
+
+static INLINE void
+emit_vertex(struct push_context *ctx, unsigned n)
+{
+   struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+   struct nouveau_channel *chan = tesla->channel;
+   int i;
+
+   if (ctx->edgeflag_attr < 16) {
+      float *edgeflag = ctx->attr[ctx->edgeflag_attr].map +
+                        ctx->attr[ctx->edgeflag_attr].stride * n;
+
+      if (*edgeflag != ctx->edgeflag) {
+         BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
+         OUT_RING  (chan, *edgeflag ? 1 : 0);
+         ctx->edgeflag = *edgeflag;
+      }
+   }
+
+   BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, ctx->vtx_size);
+   for (i = 0; i < ctx->attr_nr; i++)
+      ctx->attr[i].push(chan, ctx->attr[i].map + ctx->attr[i].stride * n);
+}
+
+static void
+emit_edgeflag(void *priv, boolean enabled)
+{
+   struct push_context *ctx = priv;
+   struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+   struct nouveau_channel *chan = tesla->channel;
+
+   BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
+   OUT_RING  (chan, enabled ? 1 : 0);
+}
+
+static void
+emit_elt08(void *priv, unsigned start, unsigned count)
+{
+   struct push_context *ctx = priv;
+   uint8_t *idxbuf = ctx->idxbuf;
+
+   while (count--)
+      emit_vertex(ctx, idxbuf[start++]);
+}
+
+static void
+emit_elt16(void *priv, unsigned start, unsigned count)
+{
+   struct push_context *ctx = priv;
+   uint16_t *idxbuf = ctx->idxbuf;
+
+   while (count--)
+      emit_vertex(ctx, idxbuf[start++]);
+}
+
+static void
+emit_elt32(void *priv, unsigned start, unsigned count)
+{
+   struct push_context *ctx = priv;
+   uint32_t *idxbuf = ctx->idxbuf;
+
+   while (count--)
+      emit_vertex(ctx, idxbuf[start++]);
+}
+
+static void
+emit_verts(void *priv, unsigned start, unsigned count)
+{
+   while (count--)
+      emit_vertex(priv, start++);
+}
+
+void
+nv50_push_elements_instanced(struct pipe_context *pipe,
+                             struct pipe_buffer *idxbuf, unsigned idxsize,
+                             unsigned mode, unsigned start, unsigned count,
+                             unsigned i_start, unsigned i_count)
+{
+   struct nv50_context *nv50 = nv50_context(pipe);
+   struct nouveau_grobj *tesla = nv50->screen->tesla;
+   struct nouveau_channel *chan = tesla->channel;
+   struct push_context ctx;
+   const unsigned p_overhead = 4 + /* begin/end */
+                               4; /* potential edgeflag enable/disable */
+   const unsigned v_overhead = 1 + /* VERTEX_DATA packet header */
+                               2; /* potential edgeflag modification */
+   struct u_split_prim s;
+   unsigned vtx_size;
+   boolean nzi = FALSE;
+   int i;
+
+   ctx.nv50 = nv50;
+   ctx.attr_nr = 0;
+   ctx.idxbuf = NULL;
+   ctx.vtx_size = 0;
+   ctx.edgeflag = 0.5f;
+   ctx.edgeflag_attr = nv50->vertprog->cfg.edgeflag_in;
+
+   /* map vertex buffers, determine vertex size */
+   for (i = 0; i < nv50->vtxelt->num_elements; i++) {
+      struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i];
+      struct pipe_vertex_buffer *vb = &nv50->vtxbuf[ve->vertex_buffer_index];
+      struct nouveau_bo *bo = nouveau_bo(vb->buffer);
+      unsigned size, nr_components, n;
+
+      if (!(nv50->vbo_fifo & (1 << i)))
+         continue;
+      n = ctx.attr_nr++;
+
+      if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) {
+         assert(bo->map);
+         return;
+      }
+      ctx.attr[n].map = bo->map + vb->buffer_offset + ve->src_offset;
+      nouveau_bo_unmap(bo);
+
+      ctx.attr[n].stride = vb->stride;
+      ctx.attr[n].divisor = ve->instance_divisor;
+      if (ctx.attr[n].divisor) {
+         ctx.attr[n].step = i_start % ve->instance_divisor;
+         ctx.attr[n].map += i_start * vb->stride;
+      }
+
+      size = util_format_get_component_bits(ve->src_format,
+                                            UTIL_FORMAT_COLORSPACE_RGB, 0);
+      nr_components = util_format_get_nr_components(ve->src_format);
+      switch (size) {
+      case 8:
+         switch (nr_components) {
+         case 1: ctx.attr[n].push = emit_b08_1; break;
+         case 2: ctx.attr[n].push = emit_b16_1; break;
+         case 3: ctx.attr[n].push = emit_b08_3; break;
+         case 4: ctx.attr[n].push = emit_b32_1; break;
+         }
+         ctx.vtx_size++;
+         break;
+      case 16:
+         switch (nr_components) {
+         case 1: ctx.attr[n].push = emit_b16_1; break;
+         case 2: ctx.attr[n].push = emit_b32_1; break;
+         case 3: ctx.attr[n].push = emit_b16_3; break;
+         case 4: ctx.attr[n].push = emit_b32_2; break;
+         }
+         ctx.vtx_size += (nr_components + 1) >> 1;
+         break;
+      case 32:
+         switch (nr_components) {
+         case 1: ctx.attr[n].push = emit_b32_1; break;
+         case 2: ctx.attr[n].push = emit_b32_2; break;
+         case 3: ctx.attr[n].push = emit_b32_3; break;
+         case 4: ctx.attr[n].push = emit_b32_4; break;
+         }
+         ctx.vtx_size += nr_components;
+         break;
+      default:
+         assert(0);
+         return;
+      }
+   }
+   vtx_size = ctx.vtx_size + v_overhead;
+
+   /* map index buffer, if present */
+   if (idxbuf) {
+      struct nouveau_bo *bo = nouveau_bo(idxbuf);
+
+      if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) {
+         assert(bo->map);
+         return;
+      }
+      ctx.idxbuf = bo->map;
+      ctx.idxsize = idxsize;
+      nouveau_bo_unmap(bo);
+   }
+
+   s.priv = &ctx;
+   s.edge = emit_edgeflag;
+   if (idxbuf) {
+      if (idxsize == 1)
+         s.emit = emit_elt08;
+      else
+      if (idxsize == 2)
+         s.emit = emit_elt16;
+      else
+         s.emit = emit_elt32;
+   } else
+      s.emit = emit_verts;
+
+   /* per-instance loop */
+   BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
+   OUT_RING  (chan, NV50_CB_AUX | (24 << 8));
+   OUT_RING  (chan, i_start);
+   while (i_count--) {
+      unsigned max_verts;
+      boolean done;
+
+      for (i = 0; i < ctx.attr_nr; i++) {
+         if (!ctx.attr[i].divisor ||
+              ctx.attr[i].divisor != ++ctx.attr[i].step)
+            continue;
+         ctx.attr[i].step = 0;
+         ctx.attr[i].map += ctx.attr[i].stride;
+      }
+
+      u_split_prim_init(&s, mode, start, count);
+      do {
+         if (AVAIL_RING(chan) < p_overhead + (6 * vtx_size)) {
+            FIRE_RING(chan);
+            if (!nv50_state_validate(nv50, p_overhead + (6 * vtx_size))) {
+               assert(0);
+               return;
+            }
+         }
+
+         max_verts  = AVAIL_RING(chan);
+         max_verts -= p_overhead;
+         max_verts /= vtx_size;
+
+         BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
+         OUT_RING  (chan, nv50_prim(s.mode) | (nzi ? (1 << 28) : 0));
+         done = u_split_prim_next(&s, max_verts);
+         BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
+         OUT_RING  (chan, 0);
+      } while (!done);
+
+      nzi = TRUE;
+   }
+}
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index 8f3c1aaf46..9d49ad6db2 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -220,6 +220,13 @@ nv50_draw_arrays_instanced(struct pipe_context *pipe,
 	if (!nv50_state_validate(nv50, 10 + 16*3))
 		return;
 
+	if (nv50->vbo_fifo) {
+		nv50_push_elements_instanced(pipe, NULL, 0, mode, start,
+					     count, startInstance,
+					     instanceCount);
+		return;
+	}
+
 	BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
 	OUT_RING  (chan, NV50_CB_AUX | (24 << 8));
 	OUT_RING  (chan, startInstance);
@@ -422,18 +429,23 @@ nv50_draw_elements_instanced(struct pipe_context *pipe,
 	struct instance a[16];
 	unsigned prim = nv50_prim(mode);
 
-	if (!(indexBuffer->usage & PIPE_BUFFER_USAGE_INDEX) ||
-	    indexSize == 1) {
+	instance_init(nv50, a, startInstance);
+	if (!nv50_state_validate(nv50, 13 + 16*3))
+		return;
+
+	if (nv50->vbo_fifo) {
+		nv50_push_elements_instanced(pipe, indexBuffer, indexSize,
+					     mode, start, count, startInstance,
+					     instanceCount);
+		return;
+	} else
+	if (!(indexBuffer->usage & PIPE_BUFFER_USAGE_INDEX) || indexSize == 1) {
 		nv50_draw_elements_inline(pipe, indexBuffer, indexSize,
 					  mode, start, count, startInstance,
 					  instanceCount);
 		return;
 	}
 
-	instance_init(nv50, a, startInstance);
-	if (!nv50_state_validate(nv50, 13 + 16*3))
-		return;
-
 	BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
 	OUT_RING  (chan, NV50_CB_AUX | (24 << 8));
 	OUT_RING  (chan, startInstance);
@@ -570,7 +582,9 @@ nv50_vbo_validate(struct nv50_context *nv50)
 	if (nv50->vtxbuf_nr == 0)
 		return NULL;
 
-	assert(!NV50_USING_LOATHED_EDGEFLAG(nv50));
+	if (NV50_USING_LOATHED_EDGEFLAG(nv50))
+		nv50->vbo_fifo = 0xffff;
+	nv50->vbo_fifo = 0xffff;
 
 	n_ve = MAX2(nv50->vtxelt->num_elements, nv50->state.vtxelt_nr);
 
@@ -590,6 +604,16 @@ nv50_vbo_validate(struct nv50_context *nv50)
 		    nv50_vbo_static_attrib(nv50, i, &vtxattr, ve, vb)) {
 			so_data(vtxfmt, hw | (1 << 4));
 
+			so_method(vtxbuf, tesla,
+				  NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
+			so_data  (vtxbuf, 0);
+
+			nv50->vbo_fifo &= ~(1 << i);
+			continue;
+		}
+
+		if (nv50->vbo_fifo) {
+			so_data  (vtxfmt, hw | (ve->instance_divisor ? (1 << 4) : i));
 			so_method(vtxbuf, tesla,
 				  NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
 			so_data  (vtxbuf, 0);
-- 
cgit v1.2.3


From 4796986c9874134e47b00ae2280c3d7fc65111df Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Tue, 9 Mar 2010 14:04:14 +1000
Subject: nv50: add option to force immediate-mode submission, disable by
 default

---
 src/gallium/drivers/nv50/nv50_screen.c | 7 +++++--
 src/gallium/drivers/nv50/nv50_screen.h | 2 ++
 src/gallium/drivers/nv50/nv50_vbo.c    | 9 +++++++--
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index 114ae9b386..7e2e8aa336 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -95,6 +95,8 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
 static int
 nv50_screen_get_param(struct pipe_screen *pscreen, int param)
 {
+	struct nv50_screen *screen = nv50_screen(pscreen);
+
 	switch (param) {
 	case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
 		return 32;
@@ -132,9 +134,9 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param)
 	case PIPE_CAP_BLEND_EQUATION_SEPARATE:
 		return 1;
 	case NOUVEAU_CAP_HW_VTXBUF:
-		return 1;
+		return screen->force_push ? 0 : 1;
 	case NOUVEAU_CAP_HW_IDXBUF:
-		return 1;
+		return screen->force_push ? 0 : 1;
 	case PIPE_CAP_INDEP_BLEND_ENABLE:
 		return 1;
 	case PIPE_CAP_INDEP_BLEND_FUNC:
@@ -493,6 +495,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	so_ref (NULL, &so);
 	nouveau_pushbuf_flush(chan, 0);
 
+	screen->force_push = debug_get_bool_option("NV50_ALWAYS_PUSH", FALSE);
 	return pscreen;
 }
 
diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h
index 2687b72127..d1bc80cb9e 100644
--- a/src/gallium/drivers/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nv50/nv50_screen.h
@@ -28,6 +28,8 @@ struct nv50_screen {
 	struct nouveau_bo *tsc;
 
 	struct nouveau_stateobj *static_init;
+
+	boolean force_push;
 };
 
 static INLINE struct nv50_screen *
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index 9d49ad6db2..7eedd49271 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -582,9 +582,14 @@ nv50_vbo_validate(struct nv50_context *nv50)
 	if (nv50->vtxbuf_nr == 0)
 		return NULL;
 
-	if (NV50_USING_LOATHED_EDGEFLAG(nv50))
+	if (nv50->screen->force_push || NV50_USING_LOATHED_EDGEFLAG(nv50))
 		nv50->vbo_fifo = 0xffff;
-	nv50->vbo_fifo = 0xffff;
+
+	for (i = 0; i < nv50->vtxbuf_nr; i++) {
+		if (nv50->vtxbuf[i].stride &&
+		    !(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX))
+			nv50->vbo_fifo = 0xffff;
+	}
 
 	n_ve = MAX2(nv50->vtxelt->num_elements, nv50->state.vtxelt_nr);
 
-- 
cgit v1.2.3


From 3f93fa601097ded6993deecb90225242b20307e0 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Tue, 9 Mar 2010 14:52:30 +1000
Subject: nv50: move nv50_prim() into a header file rather than duplicating

---
 src/gallium/drivers/nv50/nv50_context.h | 31 +++++++++++++++++++++++++++++++
 src/gallium/drivers/nv50/nv50_push.c    | 31 -------------------------------
 src/gallium/drivers/nv50/nv50_vbo.c     | 31 -------------------------------
 3 files changed, 31 insertions(+), 62 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 35abfba93b..f65b328a56 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -259,4 +259,35 @@ nv50_upload_sifc(struct nv50_context *nv50,
 struct pipe_context *
 nv50_create(struct pipe_screen *pscreen, void *priv);
 
+static INLINE unsigned
+nv50_prim(unsigned mode)
+{
+	switch (mode) {
+	case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS;
+	case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES;
+	case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP;
+	case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP;
+	case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES;
+	case PIPE_PRIM_TRIANGLE_STRIP:
+		return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP;
+	case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN;
+	case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS;
+	case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP;
+	case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON;
+	case PIPE_PRIM_LINES_ADJACENCY:
+		return NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY;
+	case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+		return NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY;
+	case PIPE_PRIM_TRIANGLES_ADJACENCY:
+		return NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY;
+	case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+		return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY;
+	default:
+		break;
+	}
+
+	NOUVEAU_ERR("invalid primitive type %d\n", mode);
+	return NV50TCL_VERTEX_BEGIN_POINTS;
+}
+
 #endif
diff --git a/src/gallium/drivers/nv50/nv50_push.c b/src/gallium/drivers/nv50/nv50_push.c
index b615f4e054..96a1f32d30 100644
--- a/src/gallium/drivers/nv50/nv50_push.c
+++ b/src/gallium/drivers/nv50/nv50_push.c
@@ -6,37 +6,6 @@
 #include "nouveau/nouveau_util.h"
 #include "nv50_context.h"
 
-static INLINE unsigned
-nv50_prim(unsigned mode)
-{
-   switch (mode) {
-   case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS;
-   case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES;
-   case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP;
-   case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP;
-   case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES;
-   case PIPE_PRIM_TRIANGLE_STRIP:
-      return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP;
-   case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN;
-   case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS;
-   case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP;
-   case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON;
-   case PIPE_PRIM_LINES_ADJACENCY:
-      return NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY;
-   case PIPE_PRIM_LINE_STRIP_ADJACENCY:
-      return NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY;
-   case PIPE_PRIM_TRIANGLES_ADJACENCY:
-      return NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY;
-   case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
-      return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY;
-   default:
-      break;
-   }
-
-   NOUVEAU_ERR("invalid primitive type %d\n", mode);
-   return NV50TCL_VERTEX_BEGIN_POINTS;
-}
-
 struct push_context {
    struct nv50_context *nv50;
 
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index 7eedd49271..2b06b81056 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -30,37 +30,6 @@
 
 #define NV50_USING_LOATHED_EDGEFLAG(ctx) ((ctx)->vertprog->cfg.edgeflag_in < 16)
 
-static INLINE unsigned
-nv50_prim(unsigned mode)
-{
-	switch (mode) {
-	case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS;
-	case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES;
-	case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP;
-	case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP;
-	case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES;
-	case PIPE_PRIM_TRIANGLE_STRIP:
-		return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP;
-	case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN;
-	case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS;
-	case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP;
-	case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON;
-	case PIPE_PRIM_LINES_ADJACENCY:
-		return NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY;
-	case PIPE_PRIM_LINE_STRIP_ADJACENCY:
-		return NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY;
-	case PIPE_PRIM_TRIANGLES_ADJACENCY:
-		return NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY;
-	case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
-		return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY;
-	default:
-		break;
-	}
-
-	NOUVEAU_ERR("invalid primitive type %d\n", mode);
-	return NV50TCL_VERTEX_BEGIN_POINTS;
-}
-
 static INLINE uint32_t
 nv50_vbo_type_to_hw(enum pipe_format format)
 {
-- 
cgit v1.2.3


From 9b233ce7de7923feb4b8ef4e1994baa4f13daeef Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Tue, 9 Mar 2010 14:56:46 +1000
Subject: nv50: remove unnecessary macro

---
 src/gallium/drivers/nv50/nv50_vbo.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index 2b06b81056..6b9c1ee231 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -28,8 +28,6 @@
 #include "nouveau/nouveau_util.h"
 #include "nv50_context.h"
 
-#define NV50_USING_LOATHED_EDGEFLAG(ctx) ((ctx)->vertprog->cfg.edgeflag_in < 16)
-
 static INLINE uint32_t
 nv50_vbo_type_to_hw(enum pipe_format format)
 {
@@ -551,7 +549,8 @@ nv50_vbo_validate(struct nv50_context *nv50)
 	if (nv50->vtxbuf_nr == 0)
 		return NULL;
 
-	if (nv50->screen->force_push || NV50_USING_LOATHED_EDGEFLAG(nv50))
+	if (nv50->screen->force_push ||
+	    nv50->vertprog->cfg.edgeflag_in < 16)
 		nv50->vbo_fifo = 0xffff;
 
 	for (i = 0; i < nv50->vtxbuf_nr; i++) {
-- 
cgit v1.2.3


From 7b7fcb08542ddd63ed6ef4c6304aade3684db948 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Tue, 9 Mar 2010 15:29:12 +1000
Subject: nv50: remove nv50_context.state.instbuf

---
 src/gallium/drivers/nv50/nv50_context.h        | 1 -
 src/gallium/drivers/nv50/nv50_state_validate.c | 5 -----
 2 files changed, 6 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index f65b328a56..6865686690 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -129,7 +129,6 @@ struct nv50_state {
 	unsigned miptree_nr[PIPE_SHADER_TYPES];
 	struct nouveau_stateobj *vtxbuf;
 	struct nouveau_stateobj *vtxattr;
-	struct nouveau_stateobj *instbuf;
 	unsigned vtxelt_nr;
 };
 
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index 2aaee31c0e..61be3a45fd 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -382,11 +382,6 @@ nv50_state_flush_notify(struct nouveau_channel *chan)
 	so_emit_reloc_markers(chan, nv50->state.hw[4]); /* fp */
 	so_emit_reloc_markers(chan, nv50->state.hw[17]); /* vb */
 	so_emit_reloc_markers(chan, nv50->screen->static_init);
-
-#if 0
-	if (nv50->state.instbuf)
-		so_emit_reloc_markers(chan, nv50->state.instbuf);
-#endif
 }
 
 boolean
-- 
cgit v1.2.3


From 139062946df4fba62a1e411073b61d4b0eeb034c Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Tue, 9 Mar 2010 15:30:08 +1000
Subject: nv50: inline nv50_state_flush_notify()

It's (rightly) not called from anywhere else now
---
 src/gallium/drivers/nv50/nv50_context.h        |  1 -
 src/gallium/drivers/nv50/nv50_state_validate.c | 21 ++++++---------------
 2 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 6865686690..8793c2aac5 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -235,7 +235,6 @@ extern void nv50_program_destroy(struct nv50_context *nv50,
 
 /* nv50_state_validate.c */
 extern boolean nv50_state_validate(struct nv50_context *nv50, unsigned dwords);
-extern void nv50_state_flush_notify(struct nouveau_channel *chan);
 
 extern void nv50_so_init_sifc(struct nv50_context *nv50,
 			      struct nouveau_stateobj *so,
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index 61be3a45fd..2c8e7ca798 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -370,20 +370,6 @@ struct state_validate {
 };
 #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
 
-void
-nv50_state_flush_notify(struct nouveau_channel *chan)
-{
-	struct nv50_context *nv50 = chan->user_private;
-
-	nv50_tex_relocs(nv50);
-
-	so_emit_reloc_markers(chan, nv50->state.hw[0]); /* fb */
-	so_emit_reloc_markers(chan, nv50->state.hw[3]); /* vp */
-	so_emit_reloc_markers(chan, nv50->state.hw[4]); /* fp */
-	so_emit_reloc_markers(chan, nv50->state.hw[17]); /* vb */
-	so_emit_reloc_markers(chan, nv50->screen->static_init);
-}
-
 boolean
 nv50_state_validate(struct nv50_context *nv50, unsigned wait_dwords)
 {
@@ -446,7 +432,12 @@ nv50_state_validate(struct nv50_context *nv50, unsigned wait_dwords)
 	 * this the kernel is given no clue that the buffer is being used
 	 * still.  This can cause all sorts of fun issues.
 	 */
-	nv50_state_flush_notify(chan);
+	nv50_tex_relocs(nv50);
+	so_emit_reloc_markers(chan, nv50->state.hw[0]); /* fb */
+	so_emit_reloc_markers(chan, nv50->state.hw[3]); /* vp */
+	so_emit_reloc_markers(chan, nv50->state.hw[4]); /* fp */
+	so_emit_reloc_markers(chan, nv50->state.hw[17]); /* vb */
+	so_emit_reloc_markers(chan, nv50->screen->static_init);
 
 	/* No idea.. */
 	BEGIN_RING(chan, tesla, 0x142c, 1);
-- 
cgit v1.2.3


From ed7f73e161b93b4a83bb6ad6b6aa6cfcb65dc4b0 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Wed, 10 Mar 2010 15:22:53 +1000
Subject: nouveau: translate PIPE_BUFFER_USAGE_UNSYNCHRONIZED

---
 src/gallium/drivers/nouveau/nouveau_screen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index f7d10a591f..b1ad686022 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -124,7 +124,7 @@ nouveau_screen_map_flags(unsigned pipe)
 	if (pipe & PIPE_BUFFER_USAGE_DONTBLOCK)
 		flags |= NOUVEAU_BO_NOWAIT;
 	else
-	if (pipe & 0 /*PIPE_BUFFER_USAGE_UNSYNCHRONIZED*/)
+	if (pipe & PIPE_BUFFER_USAGE_UNSYNCHRONIZED)
 		flags |= NOUVEAU_BO_NOSYNC;
 
 	return flags;
-- 
cgit v1.2.3


From eeaa0861bfc98a06ceec269801271b7453c4fcbd Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Wed, 10 Mar 2010 07:23:29 +0000
Subject: llvmpipe: Cope with null Vertex element cso.

CSO can often be null.

For example:
1. at initialization
2. using an util module (u_blit) right after initialization (it will push
   state and pop the previous null state)
3. at shutdown time (state shouldn't be bound when being destroyed)

Glean was hitting 2.
---
 src/gallium/drivers/llvmpipe/lp_state_vertex.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/src/gallium/drivers/llvmpipe/lp_state_vertex.c
index 2ddd110a5f..f6427aa908 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_vertex.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_vertex.c
@@ -61,7 +61,8 @@ llvmpipe_bind_vertex_elements_state(struct pipe_context *pipe,
 
    llvmpipe->dirty |= LP_NEW_VERTEX;
 
-   draw_set_vertex_elements(llvmpipe->draw, lp_velems->count, lp_velems->velem);
+   if (velems)
+      draw_set_vertex_elements(llvmpipe->draw, lp_velems->count, lp_velems->velem);
 }
 
 void
-- 
cgit v1.2.3