i965g: more work on compiling, particularly the brw_draw files

author: Keith Whitwell <keithw@vmware.com> 2009-10-25 00:02:16 +0100
committer: Keith Whitwell <keithw@vmware.com> 2009-10-25 00:02:16 +0100
commit: 4dd2f6640b70e2313f8771f7588aa49a861153aa (patch)
tree: d2cbc2b694839380de8e31a8a72f8c3d8f7c5eb1 /src/gallium/drivers/i965
parent: 4f7931bb3554cb1839adc2044e3abe6d4af8b0b5 (diff)
29 files changed, 689 insertions, 403 deletions
diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile
index 40c8364824..40e8aa8786 100644
--- a/src/gallium/drivers/i965/Makefile
+++ b/src/gallium/drivers/i965/Makefile
@@ -61,7 +61,7 @@ C_SOURCES = \
 	brw_wm_state.c \
 	brw_wm_surface_state.c \
 	brw_bo.c \
-	intel_batchbuffer.c \
+	brw_batchbuffer.c \
 	intel_tex_layout.c 
 
 include ../../Makefile.template
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c
new file mode 100644
index 0000000000..8bcac76ede
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_batchbuffer.c
@@ -0,0 +1,198 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "brw_batchbuffer.h"
+#include "brw_decode.h"
+#include "brw_reg.h"
+#include "brw_winsys.h"
+
+
+void
+brw_batchbuffer_reset(struct brw_batchbuffer *batch)
+{
+   struct intel_context *intel = batch->intel;
+
+   if (batch->buf != NULL) {
+      brw->sws->bo_unreference(batch->buf);
+      batch->buf = NULL;
+   }
+
+   if (!batch->buffer && intel->ttm == GL_TRUE)
+      batch->buffer = malloc (intel->maxBatchSize);
+
+   batch->buf = batch->sws->bo_alloc(batch->sws,
+				     BRW_BUFFER_TYPE_BATCH,
+				     intel->maxBatchSize, 4096);
+   if (batch->buffer)
+      batch->map = batch->buffer;
+   else {
+      batch->sws->bo_map(batch->buf, GL_TRUE);
+      batch->map = batch->buf->virtual;
+   }
+   batch->size = intel->maxBatchSize;
+   batch->ptr = batch->map;
+   batch->dirty_state = ~0;
+   batch->cliprect_mode = IGNORE_CLIPRECTS;
+}
+
+struct brw_batchbuffer *
+brw_batchbuffer_alloc(struct brw_winsys_screen *sws)
+{
+   struct brw_batchbuffer *batch = CALLOC_STRUCT(brw_batchbuffer);
+
+   batch->sws = sws;
+   brw_batchbuffer_reset(batch);
+
+   return batch;
+}
+
+void
+brw_batchbuffer_free(struct brw_batchbuffer *batch)
+{
+   if (batch->map) {
+      dri_bo_unmap(batch->buf);
+      batch->map = NULL;
+   }
+
+   brw->sws->bo_unreference(batch->buf);
+   batch->buf = NULL;
+   FREE(batch);
+}
+
+
+void
+_brw_batchbuffer_flush(struct brw_batchbuffer *batch, const char *file,
+			 int line)
+{
+   struct intel_context *intel = batch->intel;
+   GLuint used = batch->ptr - batch->map;
+
+   if (used == 0)
+      return;
+
+   if (intel->first_post_swapbuffers_batch == NULL) {
+      intel->first_post_swapbuffers_batch = intel->batch->buf;
+      batch->sws->bo_reference(intel->first_post_swapbuffers_batch);
+   }
+
+   if (intel->first_post_swapbuffers_batch == NULL) {
+      intel->first_post_swapbuffers_batch = intel->batch->buf;
+      batch->sws->bo_reference(intel->first_post_swapbuffers_batch);
+   }
+
+
+   if (INTEL_DEBUG & DEBUG_BATCH)
+      fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line,
+	      used);
+
+   /* Emit a flush if the bufmgr doesn't do it for us. */
+   if (intel->always_flush_cache || !intel->ttm) {
+      *(GLuint *) (batch->ptr) = intel->vtbl.flush_cmd();
+      batch->ptr += 4;
+      used = batch->ptr - batch->map;
+   }
+
+   /* Round batchbuffer usage to 2 DWORDs. */
+
+   if ((used & 4) == 0) {
+      *(GLuint *) (batch->ptr) = 0; /* noop */
+      batch->ptr += 4;
+      used = batch->ptr - batch->map;
+   }
+
+   /* Mark the end of the buffer. */
+   *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END; /* noop */
+   batch->ptr += 4;
+   used = batch->ptr - batch->map;
+
+   batch->sws->bo_unmap(batch->buf);
+
+   batch->map = NULL;
+   batch->ptr = NULL;
+      
+   batch->sws->bo_exec(batch->buf, used, NULL, 0, 0 );
+      
+   if (INTEL_DEBUG & DEBUG_BATCH) {
+      dri_bo_map(batch->buf, GL_FALSE);
+      intel_decode(batch->buf->virtual, used / 4, batch->buf->offset,
+		   brw->brw_screen->pci_id);
+      dri_bo_unmap(batch->buf);
+   }
+
+   if (INTEL_DEBUG & DEBUG_SYNC) {
+      fprintf(stderr, "waiting for idle\n");
+      dri_bo_map(batch->buf, GL_TRUE);
+      dri_bo_unmap(batch->buf);
+   }
+
+   /* Reset the buffer:
+    */
+   brw_batchbuffer_reset(batch);
+}
+
+
+/*  This is the only way buffers get added to the validate list.
+ */
+GLboolean
+brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch,
+                             struct brw_winsys_buffer *buffer,
+                             uint32_t read_domains, uint32_t write_domain,
+			     uint32_t delta)
+{
+   int ret;
+
+   if (batch->ptr - batch->map > batch->buf->size)
+      _mesa_printf ("bad relocation ptr %p map %p offset %d size %d\n",
+		    batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size);
+
+   ret = batch->sws->bo_emit_reloc(batch->buf,
+				   read_domains,
+				   write_domain,
+				   delta, 
+				   batch->ptr - batch->map,
+				   buffer);
+
+   /*
+    * Using the old buffer offset, write in what the right data would be, in case
+    * the buffer doesn't move and we can short-circuit the relocation processing
+    * in the kernel
+    */
+   brw_batchbuffer_emit_dword (batch, buffer->offset + delta);
+
+   return GL_TRUE;
+}
+
+void
+brw_batchbuffer_data(struct brw_batchbuffer *batch,
+                       const void *data, GLuint bytes,
+		       enum cliprect_mode cliprect_mode)
+{
+   assert((bytes & 3) == 0);
+   brw_batchbuffer_require_space(batch, bytes);
+   __memcpy(batch->ptr, data, bytes);
+   batch->ptr += bytes;
+}
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h
index b8492882e1..25bb9cefca 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.h
+++ b/src/gallium/drivers/i965/brw_batchbuffer.h
@@ -33,18 +33,16 @@ void brw_batchbuffer_reset(struct brw_batchbuffer *batch);
  * Consider it a convenience function wrapping multple
  * intel_buffer_dword() calls.
  */
-void brw_batchbuffer_data(struct brw_batchbuffer *batch,
+int brw_batchbuffer_data(struct brw_batchbuffer *batch,
                             const void *data, GLuint bytes,
 			    enum cliprect_mode cliprect_mode);
 
-void brw_batchbuffer_release_space(struct brw_batchbuffer *batch,
-                                     GLuint bytes);
 
-GLboolean brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch,
-                                       struct brw_winsys_buffer *buffer,
-				       uint32_t read_domains,
-				       uint32_t write_domain,
-				       uint32_t offset);
+int brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch,
+			       struct brw_winsys_buffer *buffer,
+			       uint32_t read_domains,
+			       uint32_t write_domain,
+			       uint32_t offset);
 
 /* Inline functions - might actually be better off with these
  * non-inlined.  Certainly better off switching all command packets to
diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c
index 76759304eb..ca10bc73f6 100644
--- a/src/gallium/drivers/i965/brw_cc.c
+++ b/src/gallium/drivers/i965/brw_cc.c
@@ -57,7 +57,7 @@ static void calc_sane_viewport( const struct pipe_viewport_state *vp,
    svp->far = 1;
 }
 
-static void prepare_cc_vp( struct brw_context *brw )
+static int prepare_cc_vp( struct brw_context *brw )
 {
    struct brw_cc_viewport ccv;
    struct sane_viewport svp;
@@ -72,6 +72,8 @@ static void prepare_cc_vp( struct brw_context *brw )
 
    brw->sws->bo_unreference(brw->cc.vp_bo);
    brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 );
+
+   return 0;
 }
 
 const struct brw_tracked_state brw_cc_vp = {
@@ -158,7 +160,7 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key)
    return bo;
 }
 
-static void prepare_cc_unit( struct brw_context *brw )
+static int prepare_cc_unit( struct brw_context *brw )
 {
    struct brw_cc_unit_key key;
 
@@ -172,6 +174,8 @@ static void prepare_cc_unit( struct brw_context *brw )
 
    if (brw->cc.state_bo == NULL)
       brw->cc.state_bo = cc_unit_create_from_key(brw, &key);
+   
+   return 0;
 }
 
 const struct brw_tracked_state brw_cc_unit = {
diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c
index 622d9dba96..1a52fa771b 100644
--- a/src/gallium/drivers/i965/brw_clip.c
+++ b/src/gallium/drivers/i965/brw_clip.c
@@ -146,7 +146,7 @@ static void compile_clip_prog( struct brw_context *brw,
 
 /* Calculate interpolants for triangle and line rasterization.
  */
-static void upload_clip_prog(struct brw_context *brw)
+static int upload_clip_prog(struct brw_context *brw)
 {
    struct brw_clip_prog_key key;
 
@@ -173,6 +173,8 @@ static void upload_clip_prog(struct brw_context *brw)
 					&brw->clip.prog_data);
    if (brw->clip.prog_bo == NULL)
       compile_clip_prog( brw, &key );
+
+   return 0;
 }
 
 
diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c
index 25b8c6372f..bf4e6f5103 100644
--- a/src/gallium/drivers/i965/brw_clip_state.c
+++ b/src/gallium/drivers/i965/brw_clip_state.c
@@ -159,7 +159,7 @@ clip_unit_create_from_key(struct brw_context *brw,
    return bo;
 }
 
-static void upload_clip_unit( struct brw_context *brw )
+static int upload_clip_unit( struct brw_context *brw )
 {
    struct brw_clip_unit_key key;
 
@@ -173,6 +173,8 @@ static void upload_clip_unit( struct brw_context *brw )
    if (brw->clip.state_bo == NULL) {
       brw->clip.state_bo = clip_unit_create_from_key(brw, &key);
    }
+   
+   return 0;
 }
 
 const struct brw_tracked_state brw_clip_unit = {
diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c
index e9605bafe6..e10b7d8bf5 100644
--- a/src/gallium/drivers/i965/brw_context.c
+++ b/src/gallium/drivers/i965/brw_context.c
@@ -105,7 +105,7 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen)
    brw->state.dirty.mesa = ~0;
    brw->state.dirty.brw = ~0;
 
-   brw->emit_state_always = 0;
+   brw->flags.always_emit_state = 0;
 
    make_empty_list(&brw->query.active_head);
 
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index dd782fdba9..7ead641811 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -182,6 +182,8 @@ struct brw_fragment_shader {
 #define PIPE_NEW_FRAGMENT_CONSTANTS     0x2
 #define PIPE_NEW_VERTEX_CONSTANTS       0x2
 #define PIPE_NEW_CLIP                   0x2
+#define PIPE_NEW_INDEX_BUFFER           0x2
+#define PIPE_NEW_INDEX_RANGE            0x2
 
 
 #define BRW_NEW_URB_FENCE               0x1
@@ -387,8 +389,8 @@ struct brw_cache {
  */
 struct brw_tracked_state {
    struct brw_state_flags dirty;
-   void (*prepare)( struct brw_context *brw );
-   void (*emit)( struct brw_context *brw );
+   int (*prepare)( struct brw_context *brw );
+   int (*emit)( struct brw_context *brw );
 };
 
 /* Flags for brw->state.cache.
@@ -465,9 +467,7 @@ struct brw_context
    GLuint primitive;
    GLuint reduced_primitive;
 
-   GLboolean emit_state_always;
-
-   /* Active vertex program: 
+   /* Active state from the state tracker: 
     */
    struct {
       const struct brw_vertex_shader *vertex_shader;
@@ -475,11 +475,31 @@ struct brw_context
       const struct brw_blend_state *blend;
       const struct brw_rasterizer_state *rast;
       const struct brw_depth_stencil_alpha_state *zstencil;
+
+      struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
+      struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
+      unsigned num_vertex_elements;
+      unsigned num_vertex_buffers;
+
       struct pipe_framebuffer_state fb;
       struct pipe_viewport_state vp;
       struct pipe_clip_state ucp;
       struct pipe_buffer *vertex_constants;
       struct pipe_buffer *fragment_constants;
+
+      /**
+       * Index buffer for this draw_prims call.
+       *
+       * Updates are signaled by PIPE_NEW_INDEX_BUFFER.
+       */
+      struct pipe_buffer *index_buffer;
+      unsigned index_size;
+
+      /* Updates are signalled by PIPE_NEW_INDEX_RANGE:
+       */
+      unsigned min_index;
+      unsigned max_index;
+
    } curr;
 
    struct {
@@ -504,30 +524,26 @@ struct brw_context
    struct brw_cached_batch_item *cached_batch_items;
 
    struct {
-      struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
-      struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
-      unsigned num_vertex_element;
-      unsigned num_vertex_buffer;
-
       struct u_upload_mgr *upload_vertex;
       struct u_upload_mgr *upload_index;
       
-
-      /* Summary of size and varying of active arrays, so we can check
-       * for changes to this state:
+      /* Information on uploaded vertex buffers:
        */
-      struct brw_vertex_info info;
-      unsigned int min_index, max_index;
+      struct {
+	 unsigned stride;	/* in bytes between successive vertices */
+	 unsigned offset;	/* in bytes, of first vertex in bo */
+	 unsigned vertex_count;	/* count of valid vertices which may be accessed */
+	 struct brw_winsys_buffer *bo;
+      } vb[PIPE_MAX_ATTRIBS];
+
+      struct {
+      } ve[PIPE_MAX_ATTRIBS];
+
+      unsigned nr_vb;		/* currently the same as curr.num_vertex_buffers */
+      unsigned nr_ve;		/* currently the same as curr.num_vertex_elements */
    } vb;
 
    struct {
-      /**
-       * Index buffer for this draw_prims call.
-       *
-       * Updates are signaled by BRW_NEW_INDICES.
-       */
-      const struct _mesa_index_buffer *ib;
-
       /* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */
       struct brw_winsys_buffer *bo;
       unsigned int offset;
@@ -668,6 +684,14 @@ struct brw_context
       int index;
       GLboolean active;
    } query;
+
+   struct {
+      unsigned always_emit_state:1;
+      unsigned always_flush_batch:1;
+      unsigned force_swtnl:1;
+      unsigned no_swtnl:1;
+   } flags;
+
    /* Used to give every program string a unique id
     */
    GLuint program_id;
diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c
index edc39ff223..278ffa4ca2 100644
--- a/src/gallium/drivers/i965/brw_curbe.c
+++ b/src/gallium/drivers/i965/brw_curbe.c
@@ -48,7 +48,7 @@
  * constants.  That greatly reduces the demand for space in the CURBE.
  * Some of the comments within are dated...
  */
-static void calculate_curbe_offsets( struct brw_context *brw )
+static int calculate_curbe_offsets( struct brw_context *brw )
 {
    /* CACHE_NEW_WM_PROG */
    const GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16;
@@ -104,6 +104,8 @@ static void calculate_curbe_offsets( struct brw_context *brw )
 
       brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS;
    }
+
+   return 0;
 }
 
 
@@ -157,7 +159,7 @@ static GLfloat fixed_plane[6][4] = {
  * cache mechanism, but maybe would benefit from a comparison against
  * the current uploaded set of constants.
  */
-static void prepare_constant_buffer(struct brw_context *brw)
+static int prepare_constant_buffer(struct brw_context *brw)
 {
    const GLuint sz = brw->curbe.total_size;
    const GLuint bufsz = sz * 16 * sizeof(GLfloat);
@@ -170,7 +172,7 @@ static void prepare_constant_buffer(struct brw_context *brw)
 	 brw->curbe.last_buf = NULL;
 	 brw->curbe.last_bufsz  = 0;
       }
-      return;
+      return 0;
    }
 
    buf = (GLfloat *) CALLOC(bufsz, 1);
@@ -305,9 +307,11 @@ static void prepare_constant_buffer(struct brw_context *brw)
     * flushes as necessary when doublebuffering of CURBEs isn't
     * possible.
     */
+
+   return 0;
 }
 
-static void emit_constant_buffer(struct brw_context *brw)
+static int emit_constant_buffer(struct brw_context *brw)
 {
    GLuint sz = brw->curbe.total_size;
 
@@ -322,6 +326,7 @@ static void emit_constant_buffer(struct brw_context *brw)
 		(sz - 1) + brw->curbe.curbe_offset);
    }
    ADVANCE_BATCH();
+   return 0;
 }
 
 const struct brw_tracked_state brw_constant_buffer = {
diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c
index 7af490bc5a..b5fe7c9601 100644
--- a/src/gallium/drivers/i965/brw_draw.c
+++ b/src/gallium/drivers/i965/brw_draw.c
@@ -26,15 +26,18 @@
  **************************************************************************/
 
 
+#include "util/u_prim.h"
+#include "util/u_upload_mgr.h"
+
 #include "brw_draw.h"
 #include "brw_defines.h"
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_debug.h"
+#include "brw_screen.h"
 
 #include "brw_batchbuffer.h"
 
-#define FILE_DEBUG_FLAG DEBUG_BATCH
 
 static uint32_t prim_to_hw_prim[PIPE_PRIM_POLYGON+1] = {
    _3DPRIM_POINTLIST,
@@ -56,18 +59,21 @@ static uint32_t prim_to_hw_prim[PIPE_PRIM_POLYGON+1] = {
  * programs be immune to the active primitive (ie. cope with all
  * possibilities).  That may not be realistic however.
  */
-static GLuint brw_set_prim(struct brw_context *brw, unsigned prim)
+static int brw_set_prim(struct brw_context *brw, unsigned prim )
 {
 
    if (BRW_DEBUG & DEBUG_PRIMS)
       debug_printf("PRIM: %s\n", u_prim_name(prim));
    
    if (prim != brw->primitive) {
+      unsigned reduced_prim;
+
       brw->primitive = prim;
       brw->state.dirty.brw |= BRW_NEW_PRIMITIVE;
 
-      if (reduced_prim[prim] != brw->reduced_primitive) {
-	 brw->reduced_primitive = reduced_prim[prim];
+      reduced_prim = u_reduced_prim(prim);
+      if (reduced_prim != brw->reduced_primitive) {
+	 brw->reduced_primitive = reduced_prim;
 	 brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE;
       }
    }
@@ -77,17 +83,14 @@ static GLuint brw_set_prim(struct brw_context *brw, unsigned prim)
 
 
 
-static enum pipe_error brw_emit_prim(struct brw_context *brw,
-				     unsigned prim,
-				     unsigned start,
-				     unsigned count,
-				     boolean indexed,
-				     uint32_t hw_prim)
+static int brw_emit_prim(struct brw_context *brw,
+			 unsigned start,
+			 unsigned count,
+			 boolean indexed,
+			 uint32_t hw_prim)
 {
    struct brw_3d_primitive prim_packet;
-
-   if (INTEL_DEBUG & DEBUG_PRIMS)
-      debug_printf("PRIM: %s %d %d\n", u_prim_name(prim), start, count);
+   int ret;
 
    prim_packet.header.opcode = CMD_3D_PRIM;
    prim_packet.header.length = sizeof(prim_packet)/4 - 2;
@@ -101,7 +104,7 @@ static enum pipe_error brw_emit_prim(struct brw_context *brw,
       prim_packet.start_vert_location += brw->ib.start_vertex_offset;
    prim_packet.instance_count = 1;
    prim_packet.start_instance_location = 0;
-   prim_packet.base_vert_location = prim->basevertex;
+   prim_packet.base_vert_location = 0; // prim->basevertex; XXX: add this to gallium
 
 
    /* If we're set to always flush, do it before and after the primitive emit.
@@ -109,20 +112,20 @@ static enum pipe_error brw_emit_prim(struct brw_context *brw,
     * and missed flushes of the render cache as it heads to other parts of
     * the besides the draw code.
     */
-   if (intel->always_flush_cache) {
-      BEGIN_BATCH(1, IGNORE_CLIPRECTS)
-      OUT_BATCH(intel->vtbl.flush_cmd());
+   if (0) {
+      BEGIN_BATCH(1, IGNORE_CLIPRECTS);
+      OUT_BATCH((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE);
       ADVANCE_BATCH();
    }
    if (prim_packet.verts_per_instance) {
-      ret = brw_batchbuffer_data( brw->intel.batch, &prim_packet,
+      ret = brw_batchbuffer_data( brw->batch, &prim_packet,
 				  sizeof(prim_packet), LOOP_CLIPRECTS);
       if (ret)
 	 return ret;
    }
-   if (intel->always_flush_cache) {
+   if (0) {
       BEGIN_BATCH(1, IGNORE_CLIPRECTS);
-      OUT_BATCH(intel->vtbl.flush_cmd());
+      OUT_BATCH((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE);
       ADVANCE_BATCH();
    }
 
@@ -133,44 +136,24 @@ static enum pipe_error brw_emit_prim(struct brw_context *brw,
 /* May fail if out of video memory for texture or vbo upload, or on
  * fallback conditions.
  */
-static GLboolean brw_try_draw_prims( struct brw_context *brw,
-				     const struct gl_client_array *arrays[],
-				     const struct _mesa_prim *prim,
-				     GLuint nr_prims,
-				     const struct _mesa_index_buffer *ib,
-				     GLuint min_index,
-				     GLuint max_index )
+static int
+try_draw_range_elements(struct brw_context *brw,
+			struct pipe_buffer *index_buffer,
+			unsigned hw_prim, 
+			unsigned start, unsigned count)
 {
-   struct brw_context *brw = brw_context(ctx);
-   GLboolean retval = GL_FALSE;
-   GLboolean warn = GL_FALSE;
-   GLboolean first_time = GL_TRUE;
-   uint32_t hw_prim;
-   GLuint i;
-
-   if (ctx->NewState)
-      _mesa_update_state( ctx );
-
-   /* Bind all inputs, derive varying and size information:
-    */
-   brw_merge_inputs( brw, arrays );
-
-   brw->ib.ib = ib;
-   brw->state.dirty.brw |= BRW_NEW_INDICES;
-
-   brw->vb.min_index = min_index;
-   brw->vb.max_index = max_index;
-   brw->state.dirty.brw |= BRW_NEW_VERTICES;
-
-   hw_prim = brw_set_prim(brw, prim[i].mode);
+   int ret;
 
-   brw_validate_state(brw);
+   ret = brw_validate_state(brw);
+   if (ret)
+      return ret;
 
    /* Check that we can fit our state in with our existing batchbuffer, or
     * flush otherwise.
     */
-   ret = dri_bufmgr_check_aperture_space(brw->state.validated_bos,
-					 brw->state.validated_bo_count);
+   ret = brw->sws->check_aperture_space(brw->sws,
+					brw->state.validated_bos,
+					brw->state.validated_bo_count);
    if (ret)
       return ret;
 
@@ -178,12 +161,12 @@ static GLboolean brw_try_draw_prims( struct brw_context *brw,
    if (ret)
       return ret;
    
-   ret = brw_emit_prim(brw, &prim[i], hw_prim);
+   ret = brw_emit_prim(brw, start, count, index_buffer != NULL, hw_prim);
    if (ret)
       return ret;
 
-   if (intel->always_flush_batch)
-      brw_batchbuffer_flush(intel->batch);
+   if (brw->flags.always_flush_batch)
+      brw_batchbuffer_flush(brw->batch);
 
    return 0;
 }
@@ -197,22 +180,45 @@ brw_draw_range_elements(struct pipe_context *pipe,
 			unsigned max_index,
 			unsigned mode, unsigned start, unsigned count)
 {
-   enum pipe_error ret;
+   struct brw_context *brw = brw_context(pipe);
+   int ret;
+   uint32_t hw_prim;
+
+   hw_prim = brw_set_prim(brw, mode);
 
-   if (!vbo_all_varyings_in_vbos(arrays)) {
-      if (!index_bounds_valid)
-	 vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
+   if (BRW_DEBUG & DEBUG_PRIMS)
+      debug_printf("PRIM: %s %d %d\n", u_prim_name(mode), start, count);
+
+   /* Potentially trigger upload of new index buffer.
+    *
+    * XXX: do we need to go through state validation to achieve this?
+    * Could just call upload code directly.
+    */
+   if (brw->curr.index_buffer != index_buffer) {
+      pipe_buffer_reference( &brw->curr.index_buffer, index_buffer );
+      brw->state.dirty.mesa |= PIPE_NEW_INDEX_BUFFER;
+   }
+
+   /* XXX: do we really care?
+    */
+   if (brw->curr.min_index != min_index ||
+       brw->curr.max_index != max_index) 
+   { 
+      brw->curr.min_index = min_index;
+      brw->curr.max_index = max_index;
+      brw->state.dirty.mesa |= PIPE_NEW_INDEX_RANGE;
    }
 
+
    /* Make a first attempt at drawing:
     */
-   ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+   ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count );
 
    /* Otherwise, flush and retry:
     */
    if (ret != 0) {
-      brw_batchbuffer_flush(intel->batch);
-      ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+      brw_batchbuffer_flush(brw->batch);
+      ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count );
       assert(ret == 0);
    }
 
@@ -242,28 +248,37 @@ brw_draw_arrays(struct pipe_context *pipe, unsigned mode,
 
 
 
-void brw_draw_init( struct brw_context *brw )
+boolean brw_draw_init( struct brw_context *brw )
 {
    /* Register our drawing function: 
     */
    brw->base.draw_arrays = brw_draw_arrays;
    brw->base.draw_elements = brw_draw_elements;
    brw->base.draw_range_elements = brw_draw_range_elements;
-}
 
-void brw_draw_destroy( struct brw_context *brw )
-{
-   int i;
+   /* Create helpers for uploading data in user buffers:
+    */
+   brw->vb.upload_vertex = u_upload_create( &brw->brw_screen->base,
+					    128 * 1024,
+					    64,
+					    PIPE_BUFFER_USAGE_VERTEX );
+   if (brw->vb.upload_vertex == NULL)
+      return FALSE;
+
+   brw->vb.upload_index = u_upload_create( &brw->brw_screen->base,
+					   128 * 1024,
+					   64,
+					   PIPE_BUFFER_USAGE_INDEX );
+   if (brw->vb.upload_index == NULL)
+      return FALSE;
 
-   if (brw->vb.upload.bo != NULL) {
-      brw->sws->bo_unreference(brw->vb.upload.bo);
-      brw->vb.upload.bo = NULL;
-   }
+   return TRUE;
+}
 
-   for (i = 0; i < VERT_ATTRIB_MAX; i++) {
-      brw->sws->bo_unreference(brw->vb.inputs[i].bo);
-      brw->vb.inputs[i].bo = NULL;
-   }
+void brw_draw_cleanup( struct brw_context *brw )
+{
+   u_upload_destroy( brw->vb.upload_vertex );
+   u_upload_destroy( brw->vb.upload_index );
 
    brw->sws->bo_unreference(brw->ib.bo);
    brw->ib.bo = NULL;
diff --git a/src/gallium/drivers/i965/brw_draw.h b/src/gallium/drivers/i965/brw_draw.h
index 13f0443a81..8dc5dbce62 100644
--- a/src/gallium/drivers/i965/brw_draw.h
+++ b/src/gallium/drivers/i965/brw_draw.h
@@ -32,8 +32,7 @@
 
 struct brw_context;
 
-
-void brw_draw_init( struct brw_context *brw );
+boolean brw_draw_init( struct brw_context *brw );
 void brw_draw_cleanup( struct brw_context *brw );
 
 
diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c
index 7b0860d04c..040d8ca93a 100644
--- a/src/gallium/drivers/i965/brw_draw_upload.c
+++ b/src/gallium/drivers/i965/brw_draw_upload.c
@@ -26,21 +26,23 @@
  **************************************************************************/
 
 #include "pipe/p_context.h"
+#include "pipe/p_error.h"
 
 #include "util/u_upload_mgr.h"
+#include "util/u_math.h"
 
 #include "brw_draw.h"
 #include "brw_defines.h"
 #include "brw_context.h"
 #include "brw_state.h"
-#include "brw_fallback.h"
-
+#include "brw_screen.h"
 #include "brw_batchbuffer.h"
+#include "brw_debug.h"
 
 
 
 
-unsigned brw_translate_surface_format( unsigned id )
+static unsigned brw_translate_surface_format( unsigned id )
 {
    switch (id) {
    case PIPE_FORMAT_R64_FLOAT:
@@ -186,70 +188,136 @@ static unsigned get_index_type(int type)
 }
 
 
-
-static boolean brw_prepare_vertices(struct brw_context *brw)
+static int brw_prepare_vertices(struct brw_context *brw)
 {
-   GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; 
+   unsigned int min_index = brw->curr.min_index;
+   unsigned int max_index = brw->curr.max_index;
    GLuint i;
-   const unsigned char *ptr = NULL;
-   GLuint interleave = 0;
-   unsigned int min_index = brw->vb.min_index;
-   unsigned int max_index = brw->vb.max_index;
-
-   struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
-   GLuint nr_uploads = 0;
-
-   /* First build an array of pointers to ve's in vb.inputs_read
-    */
-   if (0)
-      _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
-
+   int ret;
 
+   if (BRW_DEBUG & DEBUG_VERTS)
+      debug_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
 
-   for (i = 0; i < brw->vb.num_vertex_buffer; i++) {
-      struct brw_vertex_buffer *vb = brw->vb.vertex_buffer[i];
-      unsigned size = (vb->stride == 0 ? 
-		       vb->size :
-		       vb->stride * (max_index + 1 - min_index));
 
-
-      if (brw_is_user_buffer(vb)) {
-	 u_upload_buffer( brw->upload_vertex, 
-			  min_index * vb->stride,
-			  size,
-			  &offset,
-			  &buffer );
+   for (i = 0; i < brw->curr.num_vertex_buffers; i++) {
+      struct pipe_vertex_buffer *vb = &brw->curr.vertex_buffer[i];
+      struct brw_winsys_buffer *bo;
+      struct pipe_buffer *upload_buf;
+      unsigned offset;
+      
+      if (BRW_DEBUG & DEBUG_VERTS)
+	 debug_printf("%s vb[%d] user:%d offset:0x%x sz:0x%x stride:0x%x\n",
+		      __FUNCTION__, i,
+		      brw_buffer_is_user_buffer(vb->buffer),
+		      vb->buffer_offset,
+		      vb->buffer->size,
+		      vb->stride);
+
+      if (brw_buffer_is_user_buffer(vb->buffer)) {
+
+	 /* XXX: simplify this.  Stop the state trackers from generating
+	  * zero-stride buffers & have them use additional constants (or
+	  * add support for >1 constant buffer) instead.
+	  */
+	 unsigned size = (vb->stride == 0 ? 
+			  vb->buffer->size - vb->buffer_offset :
+			  MAX2(vb->buffer->size - vb->buffer_offset,
+			       vb->stride * (max_index + 1 - min_index)));
+
+	 ret = u_upload_buffer( brw->vb.upload_vertex, 
+				vb->buffer_offset + min_index * vb->stride,
+				size,
+				vb->buffer,
+				&offset,
+				&upload_buf );
+	 if (ret)
+	    return ret;
+
+	 bo = brw_buffer(upload_buf)->bo;
+	 
+	 assert(offset + size <= bo->size);
       }
       else
       {
-	 offset = 0;
-	 buffer = vb->buffer;
+	 offset = vb->buffer_offset;
+	 bo = brw_buffer(vb->buffer)->bo;
       }
+
+      assert(offset < bo->size);
       
       /* Set up post-upload info about this vertex buffer:
        */
-      input->offset = (unsigned long)offset;
-      input->stride = vb->stride;
-      input->count = count;
-      brw->sws->bo_unreference(input->bo);
-      input->bo = intel_bufferobj_buffer(intel, intel_buffer,
-					 INTEL_READ);
-      brw->sws->bo_reference(input->bo);
-
-      assert(input->offset < input->bo->size);
-      assert(input->offset + size <= input->bo->size);
+      brw->vb.vb[i].offset = offset;
+      brw->vb.vb[i].stride = vb->stride;
+      brw->vb.vb[i].vertex_count = (vb->stride == 0 ?
+				    1 :
+				    (bo->size - offset) / vb->stride);
+      brw->sws->bo_unreference(brw->vb.vb[i].bo);
+      brw->vb.vb[i].bo = bo;
+      brw->sws->bo_reference(brw->vb.vb[i].bo);
+
+      /* Don't need to retain this reference.  We have a reference on
+       * the underlying winsys buffer:
+       */
+      pipe_buffer_reference( &upload_buf, NULL );
    }
 
+   brw->vb.nr_vb = i;
    brw_prepare_query_begin(brw);
 
-   for (i = 0; i < brw->vb.nr_enabled; i++) {
-      struct brw_vertex_element *input = brw->vb.enabled[i];
+   for (i = 0; i < brw->vb.nr_vb; i++) {
+      brw_add_validated_bo(brw, brw->vb.vb[i].bo);
+   }
+
+   return 0;
+}
+
+static int brw_emit_vertex_buffers( struct brw_context *brw )
+{
+   int i;
+
+   /* If the VS doesn't read any inputs (calculating vertex position from
+    * a state variable for some reason, for example), just bail.
+    *
+    * The stale VB state stays in place, but they don't do anything unless
+    * a VE loads from them.
+    */
+   if (brw->vb.nr_vb == 0) {
+      if (BRW_DEBUG & DEBUG_VERTS)
+	 debug_printf("%s: no active vertex buffers\n", __FUNCTION__);
 
-      brw_add_validated_bo(brw, input->bo);
+      return 0;
+   }
+
+   /* Emit VB state packets.
+    */
+   BEGIN_BATCH(1 + brw->vb.nr_vb * 4, IGNORE_CLIPRECTS);
+   OUT_BATCH((CMD_VERTEX_BUFFER << 16) |
+	     ((1 + brw->vb.nr_vb * 4) - 2));
+
+   for (i = 0; i < brw->vb.nr_vb; i++) {
+      OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) |
+		BRW_VB0_ACCESS_VERTEXDATA |
+		(brw->vb.vb[i].stride << BRW_VB0_PITCH_SHIFT));
+      OUT_RELOC(brw->vb.vb[i].bo,
+		I915_GEM_DOMAIN_VERTEX, 0,
+		brw->vb.vb[i].offset);
+      if (BRW_IS_IGDNG(brw)) {
+	 OUT_RELOC(brw->vb.vb[i].bo,
+		   I915_GEM_DOMAIN_VERTEX, 0,
+		   brw->vb.vb[i].bo->size - 1);
+      } else
+	 OUT_BATCH(brw->vb.vb[i].stride ? brw->vb.vb[i].vertex_count : 0);
+      OUT_BATCH(0); /* Instance data step rate */
    }
+   ADVANCE_BATCH();
+   return 0;
 }
 
-static void brw_emit_vertices(struct brw_context *brw)
+
+
+
+static int brw_emit_vertex_elements(struct brw_context *brw)
 {
    GLuint i;
 
@@ -262,7 +330,7 @@ static void brw_emit_vertices(struct brw_context *brw)
     * The stale VB state stays in place, but they don't do anything unless
     * a VE loads from them.
     */
-   if (brw->vb.nr_enabled == 0) {
+   if (brw->vb.nr_ve == 0) {
       BEGIN_BATCH(3, IGNORE_CLIPRECTS);
       OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1);
       OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
@@ -274,59 +342,23 @@ static void brw_emit_vertices(struct brw_context *brw)
 		(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
 		(BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
       ADVANCE_BATCH();
-      return;
+      return 0;
    }
 
-   /* Now emit VB and VEP state packets.
+   /* Now emit vertex element (VEP) state packets.
     *
-    * This still defines a hardware VB for each input, even if they
-    * are interleaved or from the same VBO.  TBD if this makes a
-    * performance difference.
     */
-   BEGIN_BATCH(1 + brw->vb.nr_enabled * 4, IGNORE_CLIPRECTS);
-   OUT_BATCH((CMD_VERTEX_BUFFER << 16) |
-	     ((1 + brw->vb.nr_enabled * 4) - 2));
-
-   for (i = 0; i < brw->vb.nr_enabled; i++) {
-      struct brw_vertex_element *input = brw->vb.enabled[i];
-
-      OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) |
-		BRW_VB0_ACCESS_VERTEXDATA |
-		(input->stride << BRW_VB0_PITCH_SHIFT));
-      OUT_RELOC(input->bo,
-		I915_GEM_DOMAIN_VERTEX, 0,
-		input->offset);
-      if (BRW_IS_IGDNG(brw)) {
-          if (input->stride) {
-              OUT_RELOC(input->bo,
-                        I915_GEM_DOMAIN_VERTEX, 0,
-                        input->offset + input->stride * input->count - 1);
-          } else {
-              assert(input->count == 1);
-              OUT_RELOC(input->bo,
-                        I915_GEM_DOMAIN_VERTEX, 0,
-                        input->offset + input->element_size - 1);
-          }
-      } else
-          OUT_BATCH(input->stride ? input->count : 0);
-      OUT_BATCH(0); /* Instance data step rate */
-   }
-   ADVANCE_BATCH();
-
-   BEGIN_BATCH(1 + brw->vb.nr_enabled * 2, IGNORE_CLIPRECTS);
-   OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_enabled * 2) - 2));
-   for (i = 0; i < brw->vb.nr_enabled; i++) {
-      struct brw_vertex_element *input = brw->vb.enabled[i];
-      uint32_t format = get_surface_type(input->glarray->Type,
-					 input->glarray->Size,
-					 input->glarray->Format,
-					 input->glarray->Normalized);
+   BEGIN_BATCH(1 + brw->curr.num_vertex_elements * 2, IGNORE_CLIPRECTS);
+   OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_ve * 2) - 2));
+   for (i = 0; i < brw->vb.nr_ve; i++) {
+      const struct pipe_vertex_element *input = &brw->curr.vertex_element[i];
+      uint32_t format = brw_translate_surface_format( input->src_format );
       uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
       uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
       uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
       uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;
 
-      switch (input->glarray->Size) {
+      switch (input->nr_components) {
       case 0: comp0 = BRW_VE1_COMPONENT_STORE_0;
       case 1: comp1 = BRW_VE1_COMPONENT_STORE_0;
       case 2: comp2 = BRW_VE1_COMPONENT_STORE_0;
@@ -352,11 +384,29 @@ static void brw_emit_vertices(struct brw_context *brw)
                     ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
    }
    ADVANCE_BATCH();
+   return 0;
+}
+
+
+static int brw_emit_vertices( struct brw_context *brw )
+{
+   int ret;
+
+   ret = brw_emit_vertex_buffers( brw );
+   if (ret)
+      return ret;
+
+   ret = brw_emit_vertex_elements( brw );
+   if (ret)
+      return ret;
+   
+   return 0;
 }
 
+
 const struct brw_tracked_state brw_vertices = {
    .dirty = {
-      .mesa = 0,
+      .mesa = PIPE_NEW_INDEX_RANGE,
       .brw = BRW_NEW_BATCH | BRW_NEW_VERTICES,
       .cache = 0,
    },
@@ -364,104 +414,106 @@ const struct brw_tracked_state brw_vertices = {
    .emit = brw_emit_vertices,
 };
 
-static void brw_prepare_indices(struct brw_context *brw)
+
+static int brw_prepare_indices(struct brw_context *brw)
 {
-   const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
-   GLuint ib_size;
+   struct pipe_buffer *index_buffer = brw->curr.index_buffer;
    struct brw_winsys_buffer *bo = NULL;
-   struct gl_buffer_object *bufferobj;
    GLuint offset;
-   GLuint ib_type_size;
+   GLuint index_size;
+   GLuint ib_size;
+   int ret;
 
    if (index_buffer == NULL)
-      return;
+      return 0;
 
-   ib_type_size = get_size(index_buffer->type);
-   ib_size = ib_type_size * index_buffer->count;
-   bufferobj = index_buffer->obj;;
+   if (DEBUG & DEBUG_VERTS)
+      debug_printf("%s: index_size:%d index_buffer->size:%d\n",
+		   __FUNCTION__,
+		   brw->curr.index_size,
+		   brw->curr.index_buffer->size);
 
-   /* Turn into a proper VBO:
-    */
-   if (!_mesa_is_bufferobj(bufferobj)) {
-      brw->ib.start_vertex_offset = 0;
+   ib_size = index_buffer->size;
+   index_size = brw->curr.index_size;
 
-      /* Get new bufferobj, offset:
-       */
-      get_space(brw, ib_size, &bo, &offset);
-
-      /* Straight upload
+   /* Turn userbuffer into a proper hardware buffer?
+    */
+   if (brw_buffer_is_user_buffer(index_buffer)) {
+      struct pipe_buffer *upload_buf;
+
+      ret = u_upload_buffer( brw->vb.upload_index,
+			     0,
+			     ib_size,
+			     index_buffer,
+			     &offset,
+			     &upload_buf );
+      if (ret)
+	 return ret;
+
+      bo = brw_buffer(upload_buf)->bo;
+      brw->sws->bo_reference(bo);
+      pipe_buffer_reference( &upload_buf, NULL );
+
+      /* XXX: annotate the userbuffer with the upload information so
+       * that successive calls don't get re-uploaded.
        */
-      brw_bo_subdata(bo, offset, ib_size, index_buffer->ptr);
-
-   } else {
-      offset = (GLuint) (unsigned long) index_buffer->ptr;
-      brw->ib.start_vertex_offset = 0;
+   }
+   else {
+      bo = brw_buffer(index_buffer)->bo;
+      brw->sws->bo_reference(bo);
+      
+      ib_size = bo->size;
+      offset = 0;
+   }
 
-      /* If the index buffer isn't aligned to its element size, we have to
-       * rebase it into a temporary.
-       */
-       if ((get_size(index_buffer->type) - 1) & offset) {
-           GLubyte *map = ctx->Driver.MapBuffer(ctx,
-                                                GL_ELEMENT_ARRAY_BUFFER_ARB,
-                                                GL_DYNAMIC_DRAW_ARB,
-                                                bufferobj);
-           map += offset;
-
-	   get_space(brw, ib_size, &bo, &offset);
-
-	   dri_bo_subdata(bo, offset, ib_size, map);
-
-           ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj);
-       } else {
-	  bo = intel_bufferobj_buffer(intel, intel_buffer_object(bufferobj),
-				      INTEL_READ);
-	  brw->sws->bo_reference(bo);
-
-	  /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading
-	   * the index buffer state when we're just moving the start index
-	   * of our drawing.
-	   */
-	  brw->ib.start_vertex_offset = offset / ib_type_size;
-	  offset = 0;
-	  ib_size = bo->size;
-       }
+   /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading the
+    * index buffer state when we're just moving the start index of our
+    * drawing.
+    *
+    * In gallium this will happen in the case where successive draw
+    * calls are made with (distinct?) userbuffers, but the upload_mgr
+    * places the data into a single winsys buffer.
+    * 
+    * This statechange doesn't raise any state flags and is always
+    * just merged into the final draw packet:
+    */
+   if (1) {
+      assert((offset & (index_size - 1)) == 0);
+      brw->ib.start_vertex_offset = offset / index_size;
    }
 
+   /* These statechanges trigger a new CMD_INDEX_BUFFER packet:
+    */
    if (brw->ib.bo != bo ||
-       brw->ib.offset != offset ||
        brw->ib.size != ib_size)
    {
-      drm_intel_bo_unreference(brw->ib.bo);
+      brw->sws->bo_unreference(brw->ib.bo);
       brw->ib.bo = bo;
-      brw->ib.offset = offset;
       brw->ib.size = ib_size;
-
       brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER;
-   } else {
-      drm_intel_bo_unreference(bo);
+   }
+   else {
+      brw->sws->bo_unreference(bo);
    }
 
    brw_add_validated_bo(brw, brw->ib.bo);
+   return 0;
 }
 
 const struct brw_tracked_state brw_indices = {
    .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_INDICES,
+      .mesa = PIPE_NEW_INDEX_BUFFER,
+      .brw = 0,
       .cache = 0,
    },
    .prepare = brw_prepare_indices,
 };
 
-static void brw_emit_index_buffer(struct brw_context *brw)
+static int brw_emit_index_buffer(struct brw_context *brw)
 {
-   const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
-
-   if (index_buffer == NULL)
-      return;
-
    /* Emit the indexbuffer packet:
     */
+   if (brw->ib.bo)
    {
       struct brw_indexbuffer ib;
 
@@ -469,7 +521,7 @@ static void brw_emit_index_buffer(struct brw_context *brw)
 
       ib.header.bits.opcode = CMD_INDEX_BUFFER;
       ib.header.bits.length = sizeof(ib)/4 - 2;
-      ib.header.bits.index_format = get_index_type(index_buffer->type);
+      ib.header.bits.index_format = get_index_type(brw->ib.size);
       ib.header.bits.cut_index_enable = 0;
 
       BEGIN_BATCH(4, IGNORE_CLIPRECTS);
@@ -483,6 +535,8 @@ static void brw_emit_index_buffer(struct brw_context *brw)
       OUT_BATCH( 0 );
       ADVANCE_BATCH();
    }
+
+   return 0;
 }
 
 const struct brw_tracked_state brw_index_buffer = {
diff --git a/src/gallium/drivers/i965/brw_eu.c b/src/gallium/drivers/i965/brw_eu.c
index 1df561386e..df49d4b72f 100644
--- a/src/gallium/drivers/i965/brw_eu.c
+++ b/src/gallium/drivers/i965/brw_eu.c
@@ -29,6 +29,7 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
   
+#include "util/u_memory.h"
 
 #include "brw_context.h"
 #include "brw_defines.h"
@@ -237,7 +238,7 @@ brw_resolve_cals(struct brw_compile *c)
         struct brw_glsl_call *call, *next;
         for (call = c->first_call; call; call = next) {
 	    next = call->next;
-	    _mesa_free(call);
+	    FREE(call);
 	}
 	c->first_call = NULL;
     }
@@ -247,7 +248,7 @@ brw_resolve_cals(struct brw_compile *c)
         struct brw_glsl_label *label, *next;
 	for (label = c->first_label; label; label = next) {
 	    next = label->next;
-	    _mesa_free(label);
+	    FREE(label);
 	}
 	c->first_label = NULL;
     }
diff --git a/src/gallium/drivers/i965/brw_eu_debug.c b/src/gallium/drivers/i965/brw_eu_debug.c
index ad7ec36e86..5989f5a04e 100644
--- a/src/gallium/drivers/i965/brw_eu_debug.c
+++ b/src/gallium/drivers/i965/brw_eu_debug.c
@@ -28,7 +28,8 @@
   * Authors:
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
-    
+ 
+#include "util/u_debug.h"
 
 #include "brw_eu.h"
 
@@ -52,7 +53,7 @@ void brw_print_reg( struct brw_reg hwreg )
       "f"
    };
 
-   _mesa_printf("%s%s", 
+   debug_printf("%s%s", 
 		hwreg.abs ? "abs/" : "",
 		hwreg.negate ? "-" : "");
      
@@ -64,7 +65,7 @@ void brw_print_reg( struct brw_reg hwreg )
        hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
        hwreg.type == BRW_REGISTER_TYPE_F) {
       /* vector register */
-      _mesa_printf("vec%d", hwreg.nr);
+      debug_printf("vec%d", hwreg.nr);
    }
    else if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
 	    hwreg.vstride == BRW_VERTICAL_STRIDE_0 &&
@@ -72,13 +73,13 @@ void brw_print_reg( struct brw_reg hwreg )
 	    hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 &&
 	    hwreg.type == BRW_REGISTER_TYPE_F) {      
       /* "scalar" register */
-      _mesa_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4);
+      debug_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4);
    }
    else if (hwreg.file == BRW_IMMEDIATE_VALUE) {
-      _mesa_printf("imm %f", hwreg.dw1.f);
+      debug_printf("imm %f", hwreg.dw1.f);
    }
    else {
-      _mesa_printf("%s%d.%d<%d;%d,%d>:%s", 
+      debug_printf("%s%d.%d<%d;%d,%d>:%s", 
 		   file[hwreg.file],
 		   hwreg.nr,
 		   hwreg.subnr / type_sz(hwreg.type),
diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c
index 0f2612c181..98fec85c1d 100644
--- a/src/gallium/drivers/i965/brw_misc_state.c
+++ b/src/gallium/drivers/i965/brw_misc_state.c
@@ -315,24 +315,20 @@ const struct brw_tracked_state brw_polygon_stipple = {
 
 static void upload_polygon_stipple_offset(struct brw_context *brw)
 {
-   __DRIdrawablePrivate *dPriv = brw->intel.driDrawable;
    struct brw_polygon_stipple_offset bpso;
 
    memset(&bpso, 0, sizeof(bpso));
    bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET;
    bpso.header.length = sizeof(bpso)/4-2;
 
-   /* If we're drawing to a system window (ctx->DrawBuffer->Name == 0),
-    * we have to invert the Y axis in order to match the OpenGL
-    * pixel coordinate system, and our offset must be matched
-    * to the window position.  If we're drawing to a FBO
-    * (ctx->DrawBuffer->Name != 0), then our native pixel coordinate
-    * system works just fine, and there's no window system to
-    * worry about.
+   /* Never need to offset stipple coordinates.
+    *
+    * XXX: is it ever necessary to invert Y values?
     */
-   if (brw->intel.ctx.DrawBuffer->Name == 0) {
-      bpso.bits0.x_offset = (32 - (dPriv->x & 31)) & 31;
-      bpso.bits0.y_offset = (32 - ((dPriv->y + dPriv->h) & 31)) & 31;
+   if (0) {
+      int x = 0, y = 0, h = 0;
+      bpso.bits0.x_offset = (32 - (x & 31)) & 31;
+      bpso.bits0.y_offset = (32 - ((y + h) & 31)) & 31;
    }
    else {
       bpso.bits0.y_offset = 0;
diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c
index e85a1a9c1b..65e7151517 100644
--- a/src/gallium/drivers/i965/brw_pipe_flush.c
+++ b/src/gallium/drivers/i965/brw_pipe_flush.c
@@ -53,6 +53,9 @@ static void brw_note_fence( struct brw_context *brw, GLuint fence )
 static GLuint brw_flush_cmd( void )
 {
    struct brw_mi_flush flush;
+
+   return ;
+
    flush.opcode = CMD_MI_FLUSH;
    flush.pad = 0;
    flush.flags = BRW_FLUSH_STATE_CACHE;
diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c
index fbb772d18c..8b61da763c 100644
--- a/src/gallium/drivers/i965/brw_pipe_shader.c
+++ b/src/gallium/drivers/i965/brw_pipe_shader.c
@@ -33,6 +33,25 @@
 #include "brw_util.h"
 #include "brw_wm.h"
 
+
+/**
+ * Determine if the given fragment program uses GLSL features such
+ * as flow conditionals, loops, subroutines.
+ * Some GLSL shaders may use these features, others might not.
+ */
+GLboolean brw_wm_is_glsl(const struct brw_fragment_shader *fp)
+{
+    return (fp->info.insn_count[TGSI_OPCODE_ARL] > 0 ||
+	    fp->info.insn_count[TGSI_OPCODE_IF] > 0 ||
+	    fp->info.insn_count[TGSI_OPCODE_ENDIF] > 0 || /* redundant - IF */
+	    fp->info.insn_count[TGSI_OPCODE_CAL] > 0 ||
+	    fp->info.insn_count[TGSI_OPCODE_BRK] > 0 ||   /* redundant - BGNLOOP */
+	    fp->info.insn_count[TGSI_OPCODE_RET] > 0 ||	  /* redundant - CAL */
+	    fp->info.insn_count[TGSI_OPCODE_BGNLOOP] > 0);
+}
+
+
+
 static void brwBindProgram( struct brw_context *brw,
 			    GLenum target, 
 			    struct gl_program *prog )
diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c
index b0928adbe4..d1d0d7cd43 100644
--- a/src/gallium/drivers/i965/brw_pipe_vertex.c
+++ b/src/gallium/drivers/i965/brw_pipe_vertex.c
@@ -1,26 +1,11 @@
 
-static void brw_merge_inputs( struct brw_context *brw,
-		       const struct gl_client_array *arrays[])
-{
-   struct brw_vertex_info old = brw->vb.info;
-   GLuint i;
-
-   for (i = 0; i < VERT_ATTRIB_MAX; i++)
-      brw->sws->bo_unreference(brw->vb.inputs[i].bo);
 
-   memset(&brw->vb.inputs, 0, sizeof(brw->vb.inputs));
-   memset(&brw->vb.info, 0, sizeof(brw->vb.info));
 
+void 
+brw_pipe_vertex_cleanup( struct brw_context *brw )
+{
    for (i = 0; i < VERT_ATTRIB_MAX; i++) {
-      brw->vb.inputs[i].glarray = arrays[i];
-      brw->vb.inputs[i].attrib = (gl_vert_attrib) i;
-
-      if (arrays[i]->StrideB != 0)
-	 brw->vb.info.sizes[i/16] |= (brw->vb.inputs[i].glarray->Size - 1) <<
-	    ((i%16) * 2);
+      brw->sws->bo_unreference(brw->vb.inputs[i].bo);
+      brw->vb.inputs[i].bo = NULL;
    }
-
-   /* Raise statechanges if input sizes have changed. */
-   if (memcmp(brw->vb.info.sizes, old.sizes, sizeof(old.sizes)) != 0)
-      brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS;
 }
diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h
index 79d595d0ad..b0be0e1f8a 100644
--- a/src/gallium/drivers/i965/brw_screen.h
+++ b/src/gallium/drivers/i965/brw_screen.h
@@ -56,6 +56,14 @@ struct brw_transfer
    unsigned offset;
 };
 
+struct brw_buffer
+{
+   struct pipe_buffer base;
+   struct brw_winsys_buffer *bo;
+   void *ptr;
+   boolean is_user_buffer;
+};
+
 
 /*
  * Cast wrappers
@@ -72,5 +80,19 @@ brw_transfer(struct pipe_transfer *transfer)
    return (struct brw_transfer *)transfer;
 }
 
+static INLINE struct brw_buffer *
+brw_buffer(struct pipe_buffer *buffer)
+{
+   return (struct brw_buffer *)buffer;
+}
+
+
+/* Pipe buffer helpers
+ */
+static INLINE boolean
+brw_buffer_is_user_buffer( const struct pipe_buffer *buf )
+{
+   return ((const struct brw_buffer *)buf)->is_user_buffer;
+}
 
 #endif /* BRW_SCREEN_H */
diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c
index 53e8f09e37..e2db2e76e6 100644
--- a/src/gallium/drivers/i965/brw_sf.c
+++ b/src/gallium/drivers/i965/brw_sf.c
@@ -134,7 +134,7 @@ static void upload_sf_prog(struct brw_context *brw)
    key.attrs = brw->vs.prog_data->outputs_written; 
 
    /* BRW_NEW_REDUCED_PRIMITIVE */
-   switch (brw->intel.reduced_primitive) {
+   switch (brw->reduced_primitive) {
    case GL_TRIANGLES: 
       /* NOTE: We just use the edgeflag attribute as an indicator that
        * unfilled triangles are active.  We don't actually do the
diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c
index 0e406f12e1..648a16a038 100644
--- a/src/gallium/drivers/i965/brw_sf_state.c
+++ b/src/gallium/drivers/i965/brw_sf_state.c
@@ -40,19 +40,12 @@ static void upload_sf_vp(struct brw_context *brw)
    const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
    struct brw_sf_viewport sfv;
    GLfloat y_scale, y_bias;
-   const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
    const GLfloat *v = ctx->Viewport._WindowMap.m;
 
    memset(&sfv, 0, sizeof(sfv));
 
-   if (render_to_fbo) {
-      y_scale = 1.0;
-      y_bias = 0;
-   }
-   else {
-      y_scale = -1.0;
-      y_bias = ctx->DrawBuffer->Height;
-   }
+   y_scale = 1.0;
+   y_bias = 0;
 
    /* _NEW_VIEWPORT */
 
@@ -73,20 +66,11 @@ static void upload_sf_vp(struct brw_context *brw)
     * Note that the hardware's coordinates are inclusive, while Mesa's min is
     * inclusive but max is exclusive.
     */
-   if (render_to_fbo) {
-      /* texmemory: Y=0=bottom */
-      sfv.scissor.xmin = ctx->DrawBuffer->_Xmin;
-      sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
-      sfv.scissor.ymin = ctx->DrawBuffer->_Ymin;
-      sfv.scissor.ymax = ctx->DrawBuffer->_Ymax - 1;
-   }
-   else {
-      /* memory: Y=0=top */
-      sfv.scissor.xmin = ctx->DrawBuffer->_Xmin;
-      sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
-      sfv.scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax;
-      sfv.scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1;
-   }
+   /* Y=0=bottom */
+   sfv.scissor.xmin = ctx->DrawBuffer->_Xmin;
+   sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
+   sfv.scissor.ymin = ctx->DrawBuffer->_Ymin;
+   sfv.scissor.ymax = ctx->DrawBuffer->_Ymax - 1;
 
    brw->sws->bo_unreference(brw->sf.vp_bo);
    brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 );
@@ -151,7 +135,7 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key)
    /* _NEW_LIGHT */
    key->provoking_vertex = ctx->Light.ProvokingVertex;
 
-   key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
+   key->render_to_fbo = 1;
 }
 
 static struct brw_winsys_buffer *
@@ -211,11 +195,6 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
    else
       sf.sf5.front_winding = BRW_FRONTWINDING_CW;
 
-   /* The viewport is inverted for rendering to a FBO, and that inverts
-    * polygon front/back orientation.
-    */
-   sf.sf5.front_winding ^= key->render_to_fbo;
-
    switch (key->cull_face) {
    case GL_FRONT:
       sf.sf6.cull_mode = BRW_CULLMODE_FRONT;
@@ -245,7 +224,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
        sf.sf6.line_width = 0;
 
    /* _NEW_BUFFERS */
-   key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
+   key->render_to_fbo = 1;
    if (!key->render_to_fbo) {
       /* Rendering to an OpenGL window */
       sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT;
diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h
index 02657eaba7..9bf34c3fe4 100644
--- a/src/gallium/drivers/i965/brw_state.h
+++ b/src/gallium/drivers/i965/brw_state.h
@@ -109,8 +109,8 @@ struct brw_surface_key {
 /***********************************************************************
  * brw_state.c
  */
-void brw_validate_state(struct brw_context *brw);
-void brw_upload_state(struct brw_context *brw);
+int brw_validate_state(struct brw_context *brw);
+int brw_upload_state(struct brw_context *brw);
 void brw_init_state(struct brw_context *brw);
 void brw_destroy_state(struct brw_context *brw);
 
@@ -157,7 +157,7 @@ void brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer
 /***********************************************************************
  * brw_state_batch.c
  */
-#define BRW_BATCH_STRUCT(brw, s) brw_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS)
+#define BRW_BATCH_STRUCT(brw, s) brw_batchbuffer_data( brw->batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS)
 #define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) )
 
 GLboolean brw_cached_batch_struct( struct brw_context *brw,
diff --git a/src/gallium/drivers/i965/brw_state_batch.c b/src/gallium/drivers/i965/brw_state_batch.c
index b285837070..324fce5163 100644
--- a/src/gallium/drivers/i965/brw_state_batch.c
+++ b/src/gallium/drivers/i965/brw_state_batch.c
@@ -47,7 +47,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
    struct header *newheader = (struct header *)data;
 
    if (brw->emit_state_always) {
-      brw_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS);
+      brw_batchbuffer_data(brw->batch, data, sz, IGNORE_CLIPRECTS);
       return GL_TRUE;
    }
 
@@ -74,7 +74,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
 
  emit:
    memcpy(item->header, newheader, sz);
-   brw_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS);
+   brw_batchbuffer_data(brw->batch, data, sz, IGNORE_CLIPRECTS);
    return GL_TRUE;
 }
 
diff --git a/src/gallium/drivers/i965/brw_swtnl.c b/src/gallium/drivers/i965/brw_swtnl.c
index 83f138f67a..d2df8af9f4 100644
--- a/src/gallium/drivers/i965/brw_swtnl.c
+++ b/src/gallium/drivers/i965/brw_swtnl.c
@@ -12,13 +12,13 @@ static GLboolean check_fallbacks( struct brw_context *brw,
     * use fallbacks.  If we're forcing fallbacks, always
     * use fallfacks.
     */
-   if (brw->intel.conformance_mode == 0)
+   if (brw->flags.no_swtnl)
       return GL_FALSE;
 
-   if (brw->intel.conformance_mode == 2)
+   if (brw->flags.force_swtnl)
       return GL_TRUE;
 
-   if (ctx->Polygon.SmoothFlag) {
+   if (brw->curr.rast->tmpl.smooth_polys) {
       for (i = 0; i < nr_prims; i++)
 	 if (reduced_prim[prim[i].mode] == GL_TRIANGLES) 
 	    return GL_TRUE;
diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
index 82cd8007ac..51e23b9640 100644
--- a/src/gallium/drivers/i965/brw_winsys.h
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -161,6 +161,13 @@ struct brw_winsys_screen {
 		      size_t size,
 		      const void *data);
 
+   /* XXX: couldn't this be handled by returning true/false on
+    * bo_emit_reloc?
+    */
+   boolean (*check_aperture_space)( struct brw_winsys_screen *iws,
+				    struct brw_winsys_buffer **buffers,
+				    unsigned count );
+
    /**
     * Map a buffer.
     */
diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c
index 284cf42f8b..4948ea0dff 100644
--- a/src/gallium/drivers/i965/brw_wm.c
+++ b/src/gallium/drivers/i965/brw_wm.c
@@ -158,7 +158,7 @@ static void do_wm_prog( struct brw_context *brw,
    memcpy(&c->key, key, sizeof(*key));
 
    c->fp = fp;
-   c->env_param = brw->intel.ctx.FragmentProgram.Parameters;
+   c->env_param = NULL; /*brw->intel.ctx.FragmentProgram.Parameters;*/
 
    brw_init_compile(brw, &c->func);
 
diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h
index 18775830f9..e06de95a8a 100644
--- a/src/gallium/drivers/i965/brw_wm.h
+++ b/src/gallium/drivers/i965/brw_wm.h
@@ -131,9 +131,9 @@ struct brw_wm_ref {
    GLuint insn:24;
 };
 
-struct brw_wm_constref {
+struct brw_wm_imm_ref {
    const struct brw_wm_ref *ref;
-   GLfloat constval;
+   GLfloat imm1f;
 };
 
 
@@ -232,8 +232,8 @@ struct brw_wm_compile {
    struct brw_wm_instruction instruction[BRW_WM_MAX_INSN];
    GLuint nr_insns;
 
-   struct brw_wm_constref constref[BRW_WM_MAX_CONST];
-   GLuint nr_constrefs;
+   struct brw_wm_imm_ref imm_ref[BRW_WM_MAX_CONST];
+   GLuint nr_imm_refs;
 
    struct brw_wm_grf pass2_grf[BRW_WM_MAX_GRF/2];
 
diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c
index c4f0711793..a8de5fdd0b 100644
--- a/src/gallium/drivers/i965/brw_wm_glsl.c
+++ b/src/gallium/drivers/i965/brw_wm_glsl.c
@@ -7,34 +7,6 @@ static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
                                   const struct prog_instruction *inst,
                                   GLuint component);
 
-/**
- * Determine if the given fragment program uses GLSL features such
- * as flow conditionals, loops, subroutines.
- * Some GLSL shaders may use these features, others might not.
- */
-GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
-{
-    int i;
-
-    for (i = 0; i < fp->Base.NumInstructions; i++) {
-	const struct prog_instruction *inst = &fp->Base.Instructions[i];
-	switch (inst->Opcode) {
-	    case OPCODE_ARL:
-	    case OPCODE_IF:
-	    case OPCODE_ENDIF:
-	    case OPCODE_CAL:
-	    case OPCODE_BRK:
-	    case OPCODE_RET:
-	    case OPCODE_BGNLOOP:
-		return GL_TRUE; 
-	    default:
-		break;
-	}
-    }
-    return GL_FALSE; 
-}
-
-
 
 static void
 reclaim_temps(struct brw_wm_compile *c);
diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c
index de5f5fe821..31b0270e84 100644
--- a/src/gallium/drivers/i965/brw_wm_pass0.c
+++ b/src/gallium/drivers/i965/brw_wm_pass0.c
@@ -124,33 +124,33 @@ static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c,
 }
 
 
-/** Return a ref to a constant/literal value */
-static const struct brw_wm_ref *get_const_ref( struct brw_wm_compile *c,
-					       const GLfloat *constval )
+/** Return a ref to an immediate value */
+static const struct brw_wm_ref *get_imm_ref( struct brw_wm_compile *c,
+					     const GLfloat *imm1f )
 {
    GLuint i;
 
    /* Search for an existing const value matching the request:
     */
-   for (i = 0; i < c->nr_constrefs; i++) {
-      if (c->constref[i].constval == *constval) 
-	 return c->constref[i].ref;
+   for (i = 0; i < c->nr_imm_refs; i++) {
+      if (c->imm_ref[i].imm_val == *imm1f) 
+	 return c->imm_ref[i].ref;
    }
 
    /* Else try to add a new one:
     */
-   if (c->nr_constrefs < BRW_WM_MAX_CONST) {
-      GLuint i = c->nr_constrefs++;
+   if (c->nr_imm_refs < BRW_WM_MAX_IMM) {
+      GLuint i = c->nr_imm_refs++;
 
-      /* A constant is a special type of parameter:
+      /* An immediate is a special type of parameter:
        */
-      c->constref[i].constval = *constval;
-      c->constref[i].ref = get_param_ref(c, constval);
+      c->imm_ref[i].imm_val = *imm_val;
+      c->imm_ref[i].ref = get_param_ref(c, imm_val);
 
-      return c->constref[i].ref;
+      return c->imm_ref[i].ref;
    }
    else {
-      _mesa_printf("%s: out of constrefs\n", __FUNCTION__);
+      _mesa_printf("%s: out of imm_refs\n", __FUNCTION__);
       c->prog_data.error = 1;
       return NULL;
    }
@@ -200,7 +200,7 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c,
 	 case PROGRAM_CONSTANT:
 	    /* These are invarient:
 	     */
-	    ref = get_const_ref(c, &plist->ParameterValues[idx][component]);
+	    ref = get_imm_ref(c, &plist->ParameterValues[idx][component]);
 	    break;
 
 	 case PROGRAM_STATE_VAR:
@@ -266,9 +266,9 @@ static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c,
    static const GLfloat const_one = 1.0;
 
    if (component == SWIZZLE_ZERO) 
-      src_ref = get_const_ref(c, &const_zero);
+      src_ref = get_imm_ref(c, &const_zero);
    else if (component == SWIZZLE_ONE) 
-      src_ref = get_const_ref(c, &const_one);
+      src_ref = get_imm_ref(c, &const_one);
    else 
       src_ref = pass0_get_reg(c, src.File, src.Index, component);
author	Keith Whitwell <keithw@vmware.com>	2009-10-25 00:02:16 +0100
committer	Keith Whitwell <keithw@vmware.com>	2009-10-25 00:02:16 +0100
commit	4dd2f6640b70e2313f8771f7588aa49a861153aa (patch)
tree	d2cbc2b694839380de8e31a8a72f8c3d8f7c5eb1 /src/gallium/drivers/i965
parent	4f7931bb3554cb1839adc2044e3abe6d4af8b0b5 (diff)