34 files changed, 1328 insertions, 1119 deletions
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
index 21fc9ece88..42b4f923e0 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -27,81 +27,89 @@
 
 #include "intel_context.h"
 #include "intel_batchbuffer.h"
+#include "intel_buffer_objects.h"
 #include "intel_decode.h"
 #include "intel_reg.h"
 #include "intel_bufmgr.h"
 #include "intel_buffers.h"
 
-void
-intel_batchbuffer_reset(struct intel_batchbuffer *batch)
+struct cached_batch_item {
+   struct cached_batch_item *next;
+   uint16_t header;
+   uint16_t size;
+};
+
+static void clear_cache( struct intel_context *intel )
 {
-   struct intel_context *intel = batch->intel;
+   struct cached_batch_item *item = intel->batch.cached_items;
 
-   if (batch->buf != NULL) {
-      drm_intel_bo_unreference(batch->buf);
-      batch->buf = NULL;
+   while (item) {
+      struct cached_batch_item *next = item->next;
+      free(item);
+      item = next;
    }
 
-   batch->buf = drm_intel_bo_alloc(intel->bufmgr, "batchbuffer",
-				   intel->maxBatchSize, 4096);
-   drm_intel_gem_bo_map_gtt(batch->buf);
-   batch->map = batch->buf->virtual;
-
-   batch->size = intel->maxBatchSize;
-   batch->ptr = batch->map;
-   batch->reserved_space = BATCH_RESERVED;
-   batch->dirty_state = ~0;
-   batch->state_batch_offset = batch->size;
+   intel->batch.cached_items = NULL;
 }
 
-struct intel_batchbuffer *
-intel_batchbuffer_alloc(struct intel_context *intel)
+void
+intel_batchbuffer_reset(struct intel_context *intel)
 {
-   struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1);
+   if (intel->batch.bo != NULL) {
+      drm_intel_bo_unreference(intel->batch.bo);
+      intel->batch.bo = NULL;
+   }
+   clear_cache(intel);
 
-   batch->intel = intel;
-   intel_batchbuffer_reset(batch);
+   intel->batch.bo = drm_intel_bo_alloc(intel->bufmgr, "batchbuffer",
+					intel->maxBatchSize, 4096);
 
-   return batch;
+   intel->batch.reserved_space = BATCH_RESERVED;
+   intel->batch.state_batch_offset = intel->batch.bo->size;
+   intel->batch.used = 0;
 }
 
 void
-intel_batchbuffer_free(struct intel_batchbuffer *batch)
+intel_batchbuffer_free(struct intel_context *intel)
 {
-   if (batch->map) {
-      drm_intel_gem_bo_unmap_gtt(batch->buf);
-      batch->map = NULL;
-   }
-   dri_bo_unreference(batch->buf);
-   batch->buf = NULL;
-   free(batch);
+   drm_intel_bo_unreference(intel->batch.bo);
+   clear_cache(intel);
 }
 
 
-
 /* TODO: Push this whole function into bufmgr.
  */
 static void
-do_flush_locked(struct intel_batchbuffer *batch, GLuint used)
+do_flush_locked(struct intel_context *intel)
 {
-   struct intel_context *intel = batch->intel;
+   struct intel_batchbuffer *batch = &intel->batch;
    int ret = 0;
-   int x_off = 0, y_off = 0;
-
-   drm_intel_gem_bo_unmap_gtt(batch->buf);
-
-   batch->ptr = NULL;
 
    if (!intel->intelScreen->no_hw) {
-      drm_intel_bo_exec(batch->buf, used, NULL, 0,
-			(x_off & 0xffff) | (y_off << 16));
+      int ring;
+
+      if (intel->gen < 6 || !batch->is_blit) {
+	 ring = I915_EXEC_RENDER;
+      } else {
+	 ring = I915_EXEC_BLT;
+      }
+
+      ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map);
+      if (ret == 0 && batch->state_batch_offset != batch->bo->size) {
+	 ret = drm_intel_bo_subdata(batch->bo,
+				    batch->state_batch_offset,
+				    batch->bo->size - batch->state_batch_offset,
+				    (char *)batch->map + batch->state_batch_offset);
+      }
+
+      if (ret == 0)
+	 ret = drm_intel_bo_mrb_exec(batch->bo, 4*batch->used, NULL, 0, 0, ring);
    }
 
    if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) {
-      drm_intel_bo_map(batch->buf, GL_FALSE);
-      intel_decode(batch->buf->virtual, used / 4, batch->buf->offset,
+      intel_decode(batch->map, batch->used,
+		   batch->bo->offset,
 		   intel->intelScreen->deviceID, GL_TRUE);
-      drm_intel_bo_unmap(batch->buf);
 
       if (intel->vtbl.debug_batch != NULL)
 	 intel->vtbl.debug_batch(intel);
@@ -114,80 +122,54 @@ do_flush_locked(struct intel_batchbuffer *batch, GLuint used)
 }
 
 void
-_intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
-			 int line)
+_intel_batchbuffer_flush(struct intel_context *intel,
+			 const char *file, int line)
 {
-   struct intel_context *intel = batch->intel;
-   GLuint used = batch->ptr - batch->map;
-
-   if (intel->first_post_swapbuffers_batch == NULL) {
-      intel->first_post_swapbuffers_batch = intel->batch->buf;
-      drm_intel_bo_reference(intel->first_post_swapbuffers_batch);
-   }
-
-   if (used == 0)
+   if (intel->batch.used == 0)
       return;
 
    if (unlikely(INTEL_DEBUG & DEBUG_BATCH))
       fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line,
-	      used);
+	      4*intel->batch.used);
 
-   batch->reserved_space = 0;
+   intel->batch.reserved_space = 0;
 
    if (intel->always_flush_cache) {
-      intel_batchbuffer_emit_mi_flush(batch);
-      used = batch->ptr - batch->map;
-   }
-
-   /* Round batchbuffer usage to 2 DWORDs. */
-
-   if ((used & 4) == 0) {
-      *(GLuint *) (batch->ptr) = 0; /* noop */
-      batch->ptr += 4;
-      used = batch->ptr - batch->map;
+      intel_batchbuffer_emit_mi_flush(intel);
    }
 
    /* Mark the end of the buffer. */
-   *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END;
-   batch->ptr += 4;
-   used = batch->ptr - batch->map;
-   assert (used <= batch->buf->size);
-
-   /* Workaround for recursive batchbuffer flushing: If the window is
-    * moved, we can get into a case where we try to flush during a
-    * flush.  What happens is that when we try to grab the lock for
-    * the first flush, we detect that the window moved which then
-    * causes another flush (from the intel_draw_buffer() call in
-    * intelUpdatePageFlipping()).  To work around this we reset the
-    * batchbuffer tail pointer before trying to get the lock.  This
-    * prevent the nested buffer flush, but a better fix would be to
-    * avoid that in the first place. */
-   batch->ptr = batch->map;
+   intel_batchbuffer_emit_dword(intel, MI_BATCH_BUFFER_END);
+   if (intel->batch.used & 1) {
+      /* Round batchbuffer usage to 2 DWORDs. */
+      intel_batchbuffer_emit_dword(intel, MI_NOOP);
+   }
 
    if (intel->vtbl.finish_batch)
       intel->vtbl.finish_batch(intel);
 
+   intel_upload_finish(intel);
+
    /* Check that we didn't just wrap our batchbuffer at a bad time. */
    assert(!intel->no_batch_wrap);
 
-   do_flush_locked(batch, used);
+   do_flush_locked(intel);
 
    if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) {
       fprintf(stderr, "waiting for idle\n");
-      drm_intel_bo_map(batch->buf, GL_TRUE);
-      drm_intel_bo_unmap(batch->buf);
+      drm_intel_bo_wait_rendering(intel->batch.bo);
    }
 
    /* Reset the buffer:
     */
-   intel_batchbuffer_reset(batch);
+   intel_batchbuffer_reset(intel);
 }
 
 
 /*  This is the only way buffers get added to the validate list.
  */
 GLboolean
-intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
+intel_batchbuffer_emit_reloc(struct intel_context *intel,
                              drm_intel_bo *buffer,
                              uint32_t read_domains, uint32_t write_domain,
 			     uint32_t delta)
@@ -196,58 +178,98 @@ intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
 
    assert(delta < buffer->size);
 
-   if (batch->ptr - batch->map > batch->buf->size)
-    printf ("bad relocation ptr %p map %p offset %d size %lu\n",
-	    batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size);
-   ret = drm_intel_bo_emit_reloc(batch->buf, batch->ptr - batch->map,
+   ret = drm_intel_bo_emit_reloc(intel->batch.bo, 4*intel->batch.used,
 				 buffer, delta,
 				 read_domains, write_domain);
+   assert(ret == 0);
+   (void)ret;
 
    /*
     * Using the old buffer offset, write in what the right data would be, in case
     * the buffer doesn't move and we can short-circuit the relocation processing
     * in the kernel
     */
-   intel_batchbuffer_emit_dword (batch, buffer->offset + delta);
+   intel_batchbuffer_emit_dword(intel, buffer->offset + delta);
 
    return GL_TRUE;
 }
 
 GLboolean
-intel_batchbuffer_emit_reloc_fenced(struct intel_batchbuffer *batch,
+intel_batchbuffer_emit_reloc_fenced(struct intel_context *intel,
 				    drm_intel_bo *buffer,
-				    uint32_t read_domains, uint32_t write_domain,
+				    uint32_t read_domains,
+				    uint32_t write_domain,
 				    uint32_t delta)
 {
    int ret;
 
    assert(delta < buffer->size);
 
-   if (batch->ptr - batch->map > batch->buf->size)
-    printf ("bad relocation ptr %p map %p offset %d size %lu\n",
-	    batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size);
-   ret = drm_intel_bo_emit_reloc_fence(batch->buf, batch->ptr - batch->map,
+   ret = drm_intel_bo_emit_reloc_fence(intel->batch.bo, 4*intel->batch.used,
 				       buffer, delta,
 				       read_domains, write_domain);
+   assert(ret == 0);
+   (void)ret;
 
    /*
     * Using the old buffer offset, write in what the right data would
     * be, in case the buffer doesn't move and we can short-circuit the
     * relocation processing in the kernel
     */
-   intel_batchbuffer_emit_dword (batch, buffer->offset + delta);
+   intel_batchbuffer_emit_dword(intel, buffer->offset + delta);
 
    return GL_TRUE;
 }
 
 void
-intel_batchbuffer_data(struct intel_batchbuffer *batch,
-                       const void *data, GLuint bytes)
+intel_batchbuffer_data(struct intel_context *intel,
+                       const void *data, GLuint bytes, bool is_blit)
 {
    assert((bytes & 3) == 0);
-   intel_batchbuffer_require_space(batch, bytes);
-   __memcpy(batch->ptr, data, bytes);
-   batch->ptr += bytes;
+   intel_batchbuffer_require_space(intel, bytes, is_blit);
+   __memcpy(intel->batch.map + intel->batch.used, data, bytes);
+   intel->batch.used += bytes >> 2;
+}
+
+void
+intel_batchbuffer_cached_advance(struct intel_context *intel)
+{
+   struct cached_batch_item **prev = &intel->batch.cached_items, *item;
+   uint32_t sz = (intel->batch.used - intel->batch.emit) * sizeof(uint32_t);
+   uint32_t *start = intel->batch.map + intel->batch.emit;
+   uint16_t op = *start >> 16;
+
+   while (*prev) {
+      uint32_t *old;
+
+      item = *prev;
+      old = intel->batch.map + item->header;
+      if (op == *old >> 16) {
+	 if (item->size == sz && memcmp(old, start, sz) == 0) {
+	    if (prev != &intel->batch.cached_items) {
+	       *prev = item->next;
+	       item->next = intel->batch.cached_items;
+	       intel->batch.cached_items = item;
+	    }
+	    intel->batch.used = intel->batch.emit;
+	    return;
+	 }
+
+	 goto emit;
+      }
+      prev = &item->next;
+   }
+
+   item = malloc(sizeof(struct cached_batch_item));
+   if (item == NULL)
+      return;
+
+   item->next = intel->batch.cached_items;
+   intel->batch.cached_items = item;
+
+emit:
+   item->size = sz;
+   item->header = intel->batch.emit;
 }
 
 /* Emit a pipelined flush to either flush render and texture cache for
@@ -257,27 +279,35 @@ intel_batchbuffer_data(struct intel_batchbuffer *batch,
  * This is also used for the always_flush_cache driconf debug option.
  */
 void
-intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch)
+intel_batchbuffer_emit_mi_flush(struct intel_context *intel)
 {
-   struct intel_context *intel = batch->intel;
-
    if (intel->gen >= 6) {
-      BEGIN_BATCH(8);
-
-      /* XXX workaround: issue any post sync != 0 before write cache flush = 1 */
-      OUT_BATCH(_3DSTATE_PIPE_CONTROL);
-      OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
-      OUT_BATCH(0); /* write address */
-      OUT_BATCH(0); /* write data */
-
-      OUT_BATCH(_3DSTATE_PIPE_CONTROL);
-      OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH |
-		PIPE_CONTROL_WRITE_FLUSH |
-		PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-		PIPE_CONTROL_NO_WRITE);
-      OUT_BATCH(0); /* write address */
-      OUT_BATCH(0); /* write data */
-      ADVANCE_BATCH();
+      if (intel->batch.is_blit) {
+	 BEGIN_BATCH_BLT(4);
+	 OUT_BATCH(MI_FLUSH_DW);
+	 OUT_BATCH(0);
+	 OUT_BATCH(0);
+	 OUT_BATCH(0);
+	 ADVANCE_BATCH();
+      } else {
+	 BEGIN_BATCH(8);
+	 /* XXX workaround: issue any post sync != 0 before write
+	  * cache flush = 1
+	  */
+	 OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+	 OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
+	 OUT_BATCH(0); /* write address */
+	 OUT_BATCH(0); /* write data */
+
+	 OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+	 OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH |
+		   PIPE_CONTROL_WRITE_FLUSH |
+		   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+		   PIPE_CONTROL_NO_WRITE);
+	 OUT_BATCH(0); /* write address */
+	 OUT_BATCH(0); /* write data */
+	 ADVANCE_BATCH();
+      }
    } else if (intel->gen >= 4) {
       BEGIN_BATCH(4);
       OUT_BATCH(_3DSTATE_PIPE_CONTROL |
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
index 428c027c2f..a0a5c9841c 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
@@ -7,70 +7,37 @@
 #include "intel_bufmgr.h"
 #include "intel_reg.h"
 
-#define BATCH_SZ 16384
 #define BATCH_RESERVED 16
 
+void intel_batchbuffer_reset(struct intel_context *intel);
+void intel_batchbuffer_free(struct intel_context *intel);
 
-struct intel_batchbuffer
-{
-   struct intel_context *intel;
-
-   drm_intel_bo *buf;
-
-   GLubyte *map;
-   GLubyte *ptr;
-
-   GLuint size;
-   uint32_t state_batch_offset;
-
-#ifdef DEBUG
-   /** Tracking of BEGIN_BATCH()/OUT_BATCH()/ADVANCE_BATCH() debugging */
-   struct {
-      GLuint total;
-      GLubyte *start_ptr;
-   } emit;
-#endif
-
-   GLuint dirty_state;
-   GLuint reserved_space;
-};
-
-struct intel_batchbuffer *intel_batchbuffer_alloc(struct intel_context
-                                                  *intel);
-
-void intel_batchbuffer_free(struct intel_batchbuffer *batch);
-
-
-void _intel_batchbuffer_flush(struct intel_batchbuffer *batch,
+void _intel_batchbuffer_flush(struct intel_context *intel,
 			      const char *file, int line);
 
-#define intel_batchbuffer_flush(batch) \
-	_intel_batchbuffer_flush(batch, __FILE__, __LINE__)
+#define intel_batchbuffer_flush(intel) \
+	_intel_batchbuffer_flush(intel, __FILE__, __LINE__)
 
-void intel_batchbuffer_reset(struct intel_batchbuffer *batch);
 
 
 /* Unlike bmBufferData, this currently requires the buffer be mapped.
  * Consider it a convenience function wrapping multple
  * intel_buffer_dword() calls.
  */
-void intel_batchbuffer_data(struct intel_batchbuffer *batch,
-                            const void *data, GLuint bytes);
-
-void intel_batchbuffer_release_space(struct intel_batchbuffer *batch,
-                                     GLuint bytes);
+void intel_batchbuffer_data(struct intel_context *intel,
+                            const void *data, GLuint bytes, bool is_blit);
 
-GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
+GLboolean intel_batchbuffer_emit_reloc(struct intel_context *intel,
                                        drm_intel_bo *buffer,
 				       uint32_t read_domains,
 				       uint32_t write_domain,
 				       uint32_t offset);
-GLboolean intel_batchbuffer_emit_reloc_fenced(struct intel_batchbuffer *batch,
+GLboolean intel_batchbuffer_emit_reloc_fenced(struct intel_context *intel,
 					      drm_intel_bo *buffer,
 					      uint32_t read_domains,
 					      uint32_t write_domain,
 					      uint32_t offset);
-void intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch);
+void intel_batchbuffer_emit_mi_flush(struct intel_context *intel);
 
 static INLINE uint32_t float_as_int(float f)
 {
@@ -89,83 +56,93 @@ static INLINE uint32_t float_as_int(float f)
  * work...
  */
 static INLINE GLint
-intel_batchbuffer_space(struct intel_batchbuffer *batch)
+intel_batchbuffer_space(struct intel_context *intel)
 {
-   return (batch->state_batch_offset - batch->reserved_space) -
-      (batch->ptr - batch->map);
+   return (intel->batch.state_batch_offset - intel->batch.reserved_space) - intel->batch.used*4;
 }
 
 
 static INLINE void
-intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, GLuint dword)
+intel_batchbuffer_emit_dword(struct intel_context *intel, GLuint dword)
 {
 #ifdef DEBUG
-   assert(intel_batchbuffer_space(batch) >= 4);
+   assert(intel_batchbuffer_space(intel) >= 4);
 #endif
-   *(GLuint *) (batch->ptr) = dword;
-   batch->ptr += 4;
+   intel->batch.map[intel->batch.used++] = dword;
 }
 
 static INLINE void
-intel_batchbuffer_emit_float(struct intel_batchbuffer *batch, float f)
+intel_batchbuffer_emit_float(struct intel_context *intel, float f)
 {
-   intel_batchbuffer_emit_dword(batch, float_as_int(f));
+   intel_batchbuffer_emit_dword(intel, float_as_int(f));
 }
 
 static INLINE void
-intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
-                                GLuint sz)
+intel_batchbuffer_require_space(struct intel_context *intel,
+                                GLuint sz, int is_blit)
 {
+
+   if (intel->gen >= 6 &&
+       intel->batch.is_blit != is_blit && intel->batch.used) {
+      intel_batchbuffer_flush(intel);
+   }
+
+   intel->batch.is_blit = is_blit;
+
 #ifdef DEBUG
-   assert(sz < batch->size - 8);
+   assert(sz < sizeof(intel->batch.map) - BATCH_RESERVED);
 #endif
-   if (intel_batchbuffer_space(batch) < sz)
-      intel_batchbuffer_flush(batch);
+   if (intel_batchbuffer_space(intel) < sz)
+      intel_batchbuffer_flush(intel);
 }
 
 static INLINE void
-intel_batchbuffer_begin(struct intel_batchbuffer *batch, int n)
+intel_batchbuffer_begin(struct intel_context *intel, int n, bool is_blit)
 {
-   intel_batchbuffer_require_space(batch, n * 4);
+   intel_batchbuffer_require_space(intel, n * 4, is_blit);
+
+   intel->batch.emit = intel->batch.used;
 #ifdef DEBUG
-   assert(batch->map);
-   assert(batch->emit.start_ptr == NULL);
-   batch->emit.total = n * 4;
-   batch->emit.start_ptr = batch->ptr;
+   intel->batch.total = n;
 #endif
 }
 
 static INLINE void
-intel_batchbuffer_advance(struct intel_batchbuffer *batch)
+intel_batchbuffer_advance(struct intel_context *intel)
 {
 #ifdef DEBUG
-   unsigned int _n = batch->ptr - batch->emit.start_ptr;
-   assert(batch->emit.start_ptr != NULL);
-   if (_n != batch->emit.total) {
+   struct intel_batchbuffer *batch = &intel->batch;
+   unsigned int _n = batch->used - batch->emit;
+   assert(batch->total != 0);
+   if (_n != batch->total) {
       fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n",
-	      _n, batch->emit.total);
+	      _n, batch->total);
       abort();
    }
-   batch->emit.start_ptr = NULL;
+   batch->total = 0;
 #endif
 }
 
+void intel_batchbuffer_cached_advance(struct intel_context *intel);
+
 /* Here are the crusty old macros, to be removed:
  */
 #define BATCH_LOCALS
 
-#define BEGIN_BATCH(n) intel_batchbuffer_begin(intel->batch, n)
-#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d)
-#define OUT_BATCH_F(f) intel_batchbuffer_emit_float(intel->batch,f)
+#define BEGIN_BATCH(n) intel_batchbuffer_begin(intel, n, false)
+#define BEGIN_BATCH_BLT(n) intel_batchbuffer_begin(intel, n, true)
+#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel, d)
+#define OUT_BATCH_F(f) intel_batchbuffer_emit_float(intel,f)
 #define OUT_RELOC(buf, read_domains, write_domain, delta) do {		\
-   intel_batchbuffer_emit_reloc(intel->batch, buf,			\
+   intel_batchbuffer_emit_reloc(intel, buf,			\
 				read_domains, write_domain, delta);	\
 } while (0)
 #define OUT_RELOC_FENCED(buf, read_domains, write_domain, delta) do {	\
-   intel_batchbuffer_emit_reloc_fenced(intel->batch, buf,		\
+   intel_batchbuffer_emit_reloc_fenced(intel, buf,		\
 				       read_domains, write_domain, delta); \
 } while (0)
 
-#define ADVANCE_BATCH() intel_batchbuffer_advance(intel->batch);
+#define ADVANCE_BATCH() intel_batchbuffer_advance(intel);
+#define CACHED_BATCH() intel_batchbuffer_cached_advance(intel);
 
 #endif
diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c
index c2917e9b07..e1ab7f1637 100644
--- a/src/mesa/drivers/dri/intel/intel_blit.c
+++ b/src/mesa/drivers/dri/intel/intel_blit.c
@@ -38,6 +38,7 @@
 #include "intel_reg.h"
 #include "intel_regions.h"
 #include "intel_batchbuffer.h"
+#include "intel_mipmap_tree.h"
 
 #define FILE_DEBUG_FLAG DEBUG_BLIT
 
@@ -107,10 +108,6 @@ intelEmitCopyBlit(struct intel_context *intel,
    drm_intel_bo *aper_array[3];
    BATCH_LOCALS;
 
-   /* Blits are in a different ringbuffer so we don't use them. */
-   if (intel->gen >= 6)
-      return GL_FALSE;
-
    if (dst_tiling != I915_TILING_NONE) {
       if (dst_offset & 4095)
 	 return GL_FALSE;
@@ -126,12 +123,12 @@ intelEmitCopyBlit(struct intel_context *intel,
 
    /* do space check before going any further */
    do {
-       aper_array[0] = intel->batch->buf;
+       aper_array[0] = intel->batch.bo;
        aper_array[1] = dst_buffer;
        aper_array[2] = src_buffer;
 
        if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) {
-           intel_batchbuffer_flush(intel->batch);
+           intel_batchbuffer_flush(intel);
            pass++;
        } else
            break;
@@ -140,7 +137,7 @@ intelEmitCopyBlit(struct intel_context *intel,
    if (pass >= 2)
       return GL_FALSE;
 
-   intel_batchbuffer_require_space(intel->batch, 8 * 4);
+   intel_batchbuffer_require_space(intel, 8 * 4, true);
    DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
        __FUNCTION__,
        src_buffer, src_pitch, src_offset, src_x, src_y,
@@ -181,7 +178,7 @@ intelEmitCopyBlit(struct intel_context *intel,
    assert(dst_x < dst_x2);
    assert(dst_y < dst_y2);
 
-   BEGIN_BATCH(8);
+   BEGIN_BATCH_BLT(8);
    OUT_BATCH(CMD);
    OUT_BATCH(BR13 | (uint16_t)dst_pitch);
    OUT_BATCH((dst_y << 16) | dst_x);
@@ -196,7 +193,7 @@ intelEmitCopyBlit(struct intel_context *intel,
 		    src_offset);
    ADVANCE_BATCH();
 
-   intel_batchbuffer_emit_mi_flush(intel->batch);
+   intel_batchbuffer_emit_mi_flush(intel);
 
    return GL_TRUE;
 }
@@ -209,7 +206,7 @@ intelEmitCopyBlit(struct intel_context *intel,
  * which we're clearing with triangles.
  * \param mask  bitmask of BUFFER_BIT_* values indicating buffers to clear
  */
-void
+GLbitfield
 intelClearWithBlit(struct gl_context *ctx, GLbitfield mask)
 {
    struct intel_context *intel = intel_context(ctx);
@@ -217,11 +214,9 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask)
    GLuint clear_depth;
    GLboolean all;
    GLint cx, cy, cw, ch;
+   GLbitfield fail_mask = 0;
    BATCH_LOCALS;
 
-   /* Blits are in a different ringbuffer so we don't use them. */
-   assert(intel->gen < 6);
-
    /*
     * Compute values for clearing the buffers.
     */
@@ -242,7 +237,7 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask)
    ch = fb->_Ymax - fb->_Ymin;
 
    if (cw == 0 || ch == 0)
-      return;
+      return 0;
 
    GLuint buf;
    all = (cw == fb->Width && ch == fb->Height);
@@ -338,9 +333,9 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask)
 					clear[3], clear[3]);
 	    break;
 	 default:
-	    _mesa_problem(ctx, "Unexpected renderbuffer format: %d\n",
-			  irb->Base.Format);
-	    clear_val = 0;
+	    fail_mask |= bufBit;
+	    mask &= ~bufBit;
+	    continue;
 	 }
       }
 
@@ -348,15 +343,15 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask)
       assert(y1 < y2);
 
       /* do space check before going any further */
-      aper_array[0] = intel->batch->buf;
+      aper_array[0] = intel->batch.bo;
       aper_array[1] = write_buffer;
 
       if (drm_intel_bufmgr_check_aperture_space(aper_array,
 						ARRAY_SIZE(aper_array)) != 0) {
-	 intel_batchbuffer_flush(intel->batch);
+	 intel_batchbuffer_flush(intel);
       }
 
-      BEGIN_BATCH(6);
+      BEGIN_BATCH_BLT(6);
       OUT_BATCH(CMD);
       OUT_BATCH(BR13);
       OUT_BATCH((y1 << 16) | x1);
@@ -368,13 +363,15 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask)
       ADVANCE_BATCH();
 
       if (intel->always_flush_cache)
-	 intel_batchbuffer_emit_mi_flush(intel->batch);
+	 intel_batchbuffer_emit_mi_flush(intel);
 
       if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL)
 	 mask &= ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL);
       else
 	 mask &= ~bufBit;    /* turn off bit, for faster loop exit */
    }
+
+   return fail_mask;
 }
 
 GLboolean
@@ -393,10 +390,6 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
    int dwords = ALIGN(src_size, 8) / 4;
    uint32_t opcode, br13, blit_cmd;
 
-   /* Blits are in a different ringbuffer so we don't use them. */
-   if (intel->gen >= 6)
-      return GL_FALSE;
-
    if (dst_tiling != I915_TILING_NONE) {
       if (dst_offset & 4095)
 	 return GL_FALSE;
@@ -417,10 +410,10 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
        __FUNCTION__,
        dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);
 
-   intel_batchbuffer_require_space( intel->batch,
-				    (8 * 4) +
-				    (3 * 4) +
-				    dwords * 4 );
+   intel_batchbuffer_require_space(intel,
+				   (8 * 4) +
+				   (3 * 4) +
+				   dwords * 4, true);
 
    opcode = XY_SETUP_BLT_CMD;
    if (cpp == 4)
@@ -439,7 +432,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
    if (dst_tiling != I915_TILING_NONE)
       blit_cmd |= XY_DST_TILED;
 
-   BEGIN_BATCH(8 + 3);
+   BEGIN_BATCH_BLT(8 + 3);
    OUT_BATCH(opcode);
    OUT_BATCH(br13);
    OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
@@ -456,11 +449,9 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
    OUT_BATCH(((y + h) << 16) | (x + w));
    ADVANCE_BATCH();
 
-   intel_batchbuffer_data( intel->batch,
-			   src_bits,
-			   dwords * 4 );
+   intel_batchbuffer_data(intel, src_bits, dwords * 4, true);
 
-   intel_batchbuffer_emit_mi_flush(intel->batch);
+   intel_batchbuffer_emit_mi_flush(intel);
 
    return GL_TRUE;
 }
@@ -480,9 +471,6 @@ intel_emit_linear_blit(struct intel_context *intel,
    GLuint pitch, height;
    GLboolean ok;
 
-   /* Blits are in a different ringbuffer so we don't use them. */
-   assert(intel->gen < 6);
-
    /* The pitch given to the GPU must be DWORD aligned, and
     * we want width to match pitch. Max width is (1 << 15 - 1),
     * rounding that down to the nearest DWORD is 1 << 15 - 4
@@ -514,3 +502,81 @@ intel_emit_linear_blit(struct intel_context *intel,
       assert(ok);
    }
 }
+
+/**
+ * Used to initialize the alpha value of an ARGB8888 teximage after
+ * loading it from an XRGB8888 source.
+ *
+ * This is very common with glCopyTexImage2D().
+ */
+void
+intel_set_teximage_alpha_to_one(struct gl_context *ctx,
+				struct intel_texture_image *intel_image)
+{
+   struct intel_context *intel = intel_context(ctx);
+   unsigned int image_x, image_y;
+   uint32_t x1, y1, x2, y2;
+   uint32_t BR13, CMD;
+   int pitch, cpp;
+   drm_intel_bo *aper_array[2];
+   struct intel_region *region = intel_image->mt->region;
+   BATCH_LOCALS;
+
+   assert(intel_image->base.TexFormat == MESA_FORMAT_ARGB8888);
+
+   /* get dest x/y in destination texture */
+   intel_miptree_get_image_offset(intel_image->mt,
+				  intel_image->level,
+				  intel_image->face,
+				  0,
+				  &image_x, &image_y);
+
+   x1 = image_x;
+   y1 = image_y;
+   x2 = image_x + intel_image->base.Width;
+   y2 = image_y + intel_image->base.Height;
+
+   pitch = region->pitch;
+   cpp = region->cpp;
+
+   DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n",
+       __FUNCTION__,
+       intel_image->mt->region->buffer, (pitch * cpp),
+       x1, y1, x2 - x1, y2 - y1);
+
+   BR13 = br13_for_cpp(cpp) | 0xf0 << 16;
+   CMD = XY_COLOR_BLT_CMD;
+   CMD |= XY_BLT_WRITE_ALPHA;
+
+   assert(region->tiling != I915_TILING_Y);
+
+#ifndef I915
+   if (region->tiling != I915_TILING_NONE) {
+      CMD |= XY_DST_TILED;
+      pitch /= 4;
+   }
+#endif
+   BR13 |= (pitch * cpp);
+
+   /* do space check before going any further */
+   aper_array[0] = intel->batch.bo;
+   aper_array[1] = region->buffer;
+
+   if (drm_intel_bufmgr_check_aperture_space(aper_array,
+					     ARRAY_SIZE(aper_array)) != 0) {
+      intel_batchbuffer_flush(intel);
+   }
+
+   BEGIN_BATCH_BLT(6);
+   OUT_BATCH(CMD);
+   OUT_BATCH(BR13);
+   OUT_BATCH((y1 << 16) | x1);
+   OUT_BATCH((y2 << 16) | x2);
+   OUT_RELOC_FENCED(region->buffer,
+		    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+		    0);
+   OUT_BATCH(0xffffffff); /* white, but only alpha gets written */
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel);
+}
diff --git a/src/mesa/drivers/dri/intel/intel_blit.h b/src/mesa/drivers/dri/intel/intel_blit.h
index 0163146573..88322c7b49 100644
--- a/src/mesa/drivers/dri/intel/intel_blit.h
+++ b/src/mesa/drivers/dri/intel/intel_blit.h
@@ -33,7 +33,7 @@
 extern void intelCopyBuffer(const __DRIdrawable * dpriv,
                             const drm_clip_rect_t * rect);
 
-extern void intelClearWithBlit(struct gl_context * ctx, GLbitfield mask);
+extern GLbitfield intelClearWithBlit(struct gl_context * ctx, GLbitfield mask);
 
 GLboolean
 intelEmitCopyBlit(struct intel_context *intel,
@@ -69,5 +69,7 @@ void intel_emit_linear_blit(struct intel_context *intel,
 			    drm_intel_bo *src_bo,
 			    unsigned int src_offset,
 			    unsigned int size);
+void intel_set_teximage_alpha_to_one(struct gl_context *ctx,
+				     struct intel_texture_image *intel_image);
 
 #endif
diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index 87da60a771..439d6fc824 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -27,6 +27,7 @@
 
 
 #include "main/imports.h"
+#include "main/mfeatures.h"
 #include "main/mtypes.h"
 #include "main/macros.h"
 #include "main/bufferobj.h"
@@ -52,6 +53,15 @@ intel_bufferobj_alloc_buffer(struct intel_context *intel,
 					  intel_obj->Base.Size, 64);
 }
 
+static void
+release_buffer(struct intel_buffer_object *intel_obj)
+{
+   drm_intel_bo_unreference(intel_obj->buffer);
+   intel_obj->buffer = NULL;
+   intel_obj->offset = 0;
+   intel_obj->source = 0;
+}
+
 /**
  * There is some duplication between mesa's bufferobjects and our
  * bufmgr buffers.  Both have an integer handle and a hashtable to
@@ -80,8 +90,7 @@ intel_bufferobj_release_region(struct intel_context *intel,
    intel_obj->region->pbo = NULL;
    intel_obj->region = NULL;
 
-   drm_intel_bo_unreference(intel_obj->buffer);
-   intel_obj->buffer = NULL;
+   release_buffer(intel_obj);
 }
 
 /* Break the COW tie to the region.  Both the pbo and the region end
@@ -119,10 +128,8 @@ intel_bufferobj_free(struct gl_context * ctx, struct gl_buffer_object *obj)
    if (intel_obj->region) {
       intel_bufferobj_release_region(intel, intel_obj);
    }
-   else if (intel_obj->buffer) {
-      drm_intel_bo_unreference(intel_obj->buffer);
-   }
 
+   drm_intel_bo_unreference(intel_obj->buffer);
    free(intel_obj);
 }
 
@@ -153,19 +160,22 @@ intel_bufferobj_data(struct gl_context * ctx,
    if (intel_obj->region)
       intel_bufferobj_release_region(intel, intel_obj);
 
-   if (intel_obj->buffer != NULL) {
-      drm_intel_bo_unreference(intel_obj->buffer);
-      intel_obj->buffer = NULL;
-   }
+   if (intel_obj->buffer != NULL)
+      release_buffer(intel_obj);
+
    free(intel_obj->sys_buffer);
    intel_obj->sys_buffer = NULL;
 
    if (size != 0) {
+      if (usage == GL_DYNAMIC_DRAW
 #ifdef I915
-      /* On pre-965, stick VBOs in system memory, as we're always doing swtnl
-       * with their contents anyway.
-       */
-      if (target == GL_ARRAY_BUFFER || target == GL_ELEMENT_ARRAY_BUFFER) {
+	  /* On pre-965, stick VBOs in system memory, as we're always doing
+	   * swtnl with their contents anyway.
+	   */
+	  || target == GL_ARRAY_BUFFER || target == GL_ELEMENT_ARRAY_BUFFER
+#endif
+	 )
+      {
 	 intel_obj->sys_buffer = malloc(size);
 	 if (intel_obj->sys_buffer != NULL) {
 	    if (data != NULL)
@@ -173,7 +183,6 @@ intel_bufferobj_data(struct gl_context * ctx,
 	    return GL_TRUE;
 	 }
       }
-#endif
       intel_bufferobj_alloc_buffer(intel, intel_obj);
       if (!intel_obj->buffer)
          return GL_FALSE;
@@ -201,6 +210,7 @@ intel_bufferobj_subdata(struct gl_context * ctx,
 {
    struct intel_context *intel = intel_context(ctx);
    struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+   bool busy;
 
    if (size == 0)
       return;
@@ -210,34 +220,53 @@ intel_bufferobj_subdata(struct gl_context * ctx,
    if (intel_obj->region)
       intel_bufferobj_cow(intel, intel_obj);
 
-   if (intel_obj->sys_buffer)
-      memcpy((char *)intel_obj->sys_buffer + offset, data, size);
-   else {
-      /* Flush any existing batchbuffer that might reference this data. */
-      if (intel->gen < 6) {
-	 if (drm_intel_bo_busy(intel_obj->buffer) ||
-	     drm_intel_bo_references(intel->batch->buf, intel_obj->buffer)) {
-	    drm_intel_bo *temp_bo;
+   /* If we have a single copy in system memory, update that */
+   if (intel_obj->sys_buffer) {
+      if (intel_obj->source)
+	 release_buffer(intel_obj);
 
-	    temp_bo = drm_intel_bo_alloc(intel->bufmgr, "subdata temp", size, 64);
+      if (intel_obj->buffer == NULL) {
+	 memcpy((char *)intel_obj->sys_buffer + offset, data, size);
+	 return;
+      }
 
-	    drm_intel_bo_subdata(temp_bo, 0, size, data);
+      free(intel_obj->sys_buffer);
+      intel_obj->sys_buffer = NULL;
+   }
 
-	    intel_emit_linear_blit(intel,
-				   intel_obj->buffer, offset,
-				   temp_bo, 0,
-				   size);
+   /* Otherwise we need to update the copy in video memory. */
+   busy =
+      drm_intel_bo_busy(intel_obj->buffer) ||
+      drm_intel_bo_references(intel->batch.bo, intel_obj->buffer);
 
-	    drm_intel_bo_unreference(temp_bo);
-	 } else {
-	    drm_intel_bo_subdata(intel_obj->buffer, offset, size, data);
-	 }
+   /* replace the current busy bo with fresh data */
+   if (busy && size == intel_obj->Base.Size) {
+      drm_intel_bo_unreference(intel_obj->buffer);
+      intel_bufferobj_alloc_buffer(intel, intel_obj);
+      drm_intel_bo_subdata(intel_obj->buffer, 0, size, data);
+   } else if (intel->gen < 6) {
+      if (busy) {
+	 drm_intel_bo *temp_bo;
+
+	 temp_bo = drm_intel_bo_alloc(intel->bufmgr, "subdata temp", size, 64);
+
+	 drm_intel_bo_subdata(temp_bo, 0, size, data);
+
+	 intel_emit_linear_blit(intel,
+				intel_obj->buffer, offset,
+				temp_bo, 0,
+				size);
+
+	 drm_intel_bo_unreference(temp_bo);
       } else {
-	 if (drm_intel_bo_references(intel->batch->buf, intel_obj->buffer)) {
-	    intel_batchbuffer_flush(intel->batch);
-	 }
 	 drm_intel_bo_subdata(intel_obj->buffer, offset, size, data);
       }
+   } else {
+      /* Can't use the blit to modify the buffer in the middle of batch. */
+      if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) {
+	 intel_batchbuffer_flush(intel);
+      }
+      drm_intel_bo_subdata(intel_obj->buffer, offset, size, data);
    }
 }
 
@@ -279,14 +308,23 @@ intel_bufferobj_map(struct gl_context * ctx,
    assert(intel_obj);
 
    if (intel_obj->sys_buffer) {
-      obj->Pointer = intel_obj->sys_buffer;
-      obj->Length = obj->Size;
-      obj->Offset = 0;
-      return obj->Pointer;
+      if (!read_only && intel_obj->source) {
+	 release_buffer(intel_obj);
+      }
+
+      if (!intel_obj->buffer || intel_obj->source) {
+	 obj->Pointer = intel_obj->sys_buffer;
+	 obj->Length = obj->Size;
+	 obj->Offset = 0;
+	 return obj->Pointer;
+      }
+
+      free(intel_obj->sys_buffer);
+      intel_obj->sys_buffer = NULL;
    }
 
    /* Flush any existing batchbuffer that might reference this data. */
-   if (drm_intel_bo_references(intel->batch->buf, intel_obj->buffer))
+   if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer))
       intel_flush(ctx);
 
    if (intel_obj->region)
@@ -335,6 +373,7 @@ intel_bufferobj_map_range(struct gl_context * ctx,
 {
    struct intel_context *intel = intel_context(ctx);
    struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+   GLboolean read_only = (access == GL_READ_ONLY_ARB);
 
    assert(intel_obj);
 
@@ -346,8 +385,16 @@ intel_bufferobj_map_range(struct gl_context * ctx,
    obj->AccessFlags = access;
 
    if (intel_obj->sys_buffer) {
-      obj->Pointer = intel_obj->sys_buffer + offset;
-      return obj->Pointer;
+      if (!read_only && intel_obj->source)
+	 release_buffer(intel_obj);
+
+      if (!intel_obj->buffer || intel_obj->source) {
+	 obj->Pointer = intel_obj->sys_buffer + offset;
+	 return obj->Pointer;
+      }
+
+      free(intel_obj->sys_buffer);
+      intel_obj->sys_buffer = NULL;
    }
 
    if (intel_obj->region)
@@ -358,7 +405,7 @@ intel_bufferobj_map_range(struct gl_context * ctx,
     * syncing.
     */
    if (!(access & GL_MAP_UNSYNCHRONIZED_BIT) &&
-       drm_intel_bo_references(intel->batch->buf, intel_obj->buffer))
+       drm_intel_bo_references(intel->batch.bo, intel_obj->buffer))
       intel_flush(ctx);
 
    if (intel_obj->buffer == NULL) {
@@ -373,8 +420,7 @@ intel_bufferobj_map_range(struct gl_context * ctx,
        (access & GL_MAP_INVALIDATE_BUFFER_BIT) &&
        drm_intel_bo_busy(intel_obj->buffer)) {
       drm_intel_bo_unreference(intel_obj->buffer);
-      intel_obj->buffer = drm_intel_bo_alloc(intel->bufmgr, "bufferobj",
-					     intel_obj->Base.Size, 64);
+      intel_bufferobj_alloc_buffer(intel, intel_obj);
    }
 
    /* If the user is mapping a range of an active buffer object but
@@ -472,7 +518,7 @@ intel_bufferobj_unmap(struct gl_context * ctx,
        * flush.  Once again, we wish for a domain tracker in libdrm to cover
        * usage inside of a batchbuffer.
        */
-      intel_batchbuffer_emit_mi_flush(intel->batch);
+      intel_batchbuffer_emit_mi_flush(intel);
       free(intel_obj->range_map_buffer);
       intel_obj->range_map_buffer = NULL;
    } else if (intel_obj->range_map_bo != NULL) {
@@ -492,7 +538,7 @@ intel_bufferobj_unmap(struct gl_context * ctx,
        * flush.  Once again, we wish for a domain tracker in libdrm to cover
        * usage inside of a batchbuffer.
        */
-      intel_batchbuffer_emit_mi_flush(intel->batch);
+      intel_batchbuffer_emit_mi_flush(intel);
 
       drm_intel_bo_unreference(intel_obj->range_map_bo);
       intel_obj->range_map_bo = NULL;
@@ -512,7 +558,8 @@ intel_bufferobj_unmap(struct gl_context * ctx,
 
 drm_intel_bo *
 intel_bufferobj_buffer(struct intel_context *intel,
-                       struct intel_buffer_object *intel_obj, GLuint flag)
+                       struct intel_buffer_object *intel_obj,
+		       GLuint flag)
 {
    if (intel_obj->region) {
       if (flag == INTEL_WRITE_PART)
@@ -523,23 +570,169 @@ intel_bufferobj_buffer(struct intel_context *intel,
       }
    }
 
-   if (intel_obj->buffer == NULL) {
-      void *sys_buffer = intel_obj->sys_buffer;
+   if (intel_obj->source)
+      release_buffer(intel_obj);
 
-      /* only one of buffer and sys_buffer could be non-NULL */
+   if (intel_obj->buffer == NULL) {
       intel_bufferobj_alloc_buffer(intel, intel_obj);
-      intel_obj->sys_buffer = NULL;
+      drm_intel_bo_subdata(intel_obj->buffer,
+			   0, intel_obj->Base.Size,
+			   intel_obj->sys_buffer);
 
-      intel_bufferobj_subdata(&intel->ctx,
-			      GL_ARRAY_BUFFER_ARB,
-			      0,
-			      intel_obj->Base.Size,
-			      sys_buffer,
-			      &intel_obj->Base);
-      free(sys_buffer);
+      free(intel_obj->sys_buffer);
       intel_obj->sys_buffer = NULL;
+      intel_obj->offset = 0;
+   }
+
+   return intel_obj->buffer;
+}
+
+#define INTEL_UPLOAD_SIZE (64*1024)
+
+void
+intel_upload_finish(struct intel_context *intel)
+{
+   if (!intel->upload.bo)
+	   return;
+
+   if (intel->upload.buffer_len) {
+	   drm_intel_bo_subdata(intel->upload.bo,
+				intel->upload.buffer_offset,
+				intel->upload.buffer_len,
+				intel->upload.buffer);
+	   intel->upload.buffer_len = 0;
+   }
+
+   drm_intel_bo_unreference(intel->upload.bo);
+   intel->upload.bo = NULL;
+}
+
+static void wrap_buffers(struct intel_context *intel, GLuint size)
+{
+   intel_upload_finish(intel);
+
+   if (size < INTEL_UPLOAD_SIZE)
+      size = INTEL_UPLOAD_SIZE;
+
+   intel->upload.bo = drm_intel_bo_alloc(intel->bufmgr, "upload", size, 0);
+   intel->upload.offset = 0;
+}
+
+void intel_upload_data(struct intel_context *intel,
+		       const void *ptr, GLuint size, GLuint align,
+		       drm_intel_bo **return_bo,
+		       GLuint *return_offset)
+{
+   GLuint base, delta;
+
+   base = (intel->upload.offset + align - 1) / align * align;
+   if (intel->upload.bo == NULL || base + size > intel->upload.bo->size) {
+      wrap_buffers(intel, size);
+      base = 0;
+   }
+
+   drm_intel_bo_reference(intel->upload.bo);
+   *return_bo = intel->upload.bo;
+   *return_offset = base;
+
+   delta = base - intel->upload.offset;
+   if (intel->upload.buffer_len &&
+       intel->upload.buffer_len + delta + size > sizeof(intel->upload.buffer))
+   {
+      drm_intel_bo_subdata(intel->upload.bo,
+			   intel->upload.buffer_offset,
+			   intel->upload.buffer_len,
+			   intel->upload.buffer);
+      intel->upload.buffer_len = 0;
+   }
+
+   if (size < sizeof(intel->upload.buffer))
+   {
+      if (intel->upload.buffer_len == 0)
+	 intel->upload.buffer_offset = base;
+      else
+	 intel->upload.buffer_len += delta;
+
+      memcpy(intel->upload.buffer + intel->upload.buffer_len, ptr, size);
+      intel->upload.buffer_len += size;
+   }
+   else
+   {
+      drm_intel_bo_subdata(intel->upload.bo, base, size, ptr);
+   }
+
+   intel->upload.offset = base + size;
+}
+
+void *intel_upload_map(struct intel_context *intel, GLuint size, GLuint align)
+{
+   GLuint base, delta;
+   char *ptr;
+
+   base = (intel->upload.offset + align - 1) / align * align;
+   if (intel->upload.bo == NULL || base + size > intel->upload.bo->size) {
+      wrap_buffers(intel, size);
+      base = 0;
+   }
+
+   delta = base - intel->upload.offset;
+   if (intel->upload.buffer_len &&
+       intel->upload.buffer_len + delta + size > sizeof(intel->upload.buffer))
+   {
+      drm_intel_bo_subdata(intel->upload.bo,
+			   intel->upload.buffer_offset,
+			   intel->upload.buffer_len,
+			   intel->upload.buffer);
+      intel->upload.buffer_len = 0;
+   }
+
+   if (size <= sizeof(intel->upload.buffer)) {
+      if (intel->upload.buffer_len == 0)
+	 intel->upload.buffer_offset = base;
+      else
+	 intel->upload.buffer_len += delta;
+
+      ptr = intel->upload.buffer + intel->upload.buffer_len;
+      intel->upload.buffer_len += size;
+   } else
+      ptr = malloc(size);
+
+   return ptr;
+}
+
+void intel_upload_unmap(struct intel_context *intel,
+			const void *ptr, GLuint size, GLuint align,
+			drm_intel_bo **return_bo,
+			GLuint *return_offset)
+{
+   GLuint base;
+
+   base = (intel->upload.offset + align - 1) / align * align;
+   if (size > sizeof(intel->upload.buffer)) {
+      drm_intel_bo_subdata(intel->upload.bo, base, size, ptr);
+      free((void*)ptr);
+   }
+
+   drm_intel_bo_reference(intel->upload.bo);
+   *return_bo = intel->upload.bo;
+   *return_offset = base;
+
+   intel->upload.offset = base + size;
+}
+
+drm_intel_bo *
+intel_bufferobj_source(struct intel_context *intel,
+                       struct intel_buffer_object *intel_obj,
+		       GLuint align, GLuint *offset)
+{
+   if (intel_obj->buffer == NULL) {
+      intel_upload_data(intel,
+			intel_obj->sys_buffer, intel_obj->Base.Size, align,
+			&intel_obj->buffer, &intel_obj->offset);
+      intel_obj->source = 1;
    }
 
+   *offset = intel_obj->offset;
    return intel_obj->buffer;
 }
 
@@ -554,6 +747,7 @@ intel_bufferobj_copy_subdata(struct gl_context *ctx,
    struct intel_buffer_object *intel_src = intel_buffer_object(src);
    struct intel_buffer_object *intel_dst = intel_buffer_object(dst);
    drm_intel_bo *src_bo, *dst_bo;
+   GLuint src_offset;
 
    if (size == 0)
       return;
@@ -566,7 +760,7 @@ intel_bufferobj_copy_subdata(struct gl_context *ctx,
       if (src == dst) {
 	 char *ptr = intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER,
 					 GL_READ_WRITE, dst);
-	 memcpy(ptr + write_offset, ptr + read_offset, size);
+	 memmove(ptr + write_offset, ptr + read_offset, size);
 	 intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst);
       } else {
 	 const char *src_ptr;
@@ -588,18 +782,18 @@ intel_bufferobj_copy_subdata(struct gl_context *ctx,
    /* Otherwise, we have real BOs, so blit them. */
 
    dst_bo = intel_bufferobj_buffer(intel, intel_dst, INTEL_WRITE_PART);
-   src_bo = intel_bufferobj_buffer(intel, intel_src, INTEL_READ);
+   src_bo = intel_bufferobj_source(intel, intel_src, 64, &src_offset);
 
    intel_emit_linear_blit(intel,
 			  dst_bo, write_offset,
-			  src_bo, read_offset, size);
+			  src_bo, read_offset + src_offset, size);
 
    /* Since we've emitted some blits to buffers that will (likely) be used
     * in rendering operations in other cache domains in this batch, emit a
     * flush.  Once again, we wish for a domain tracker in libdrm to cover
     * usage inside of a batchbuffer.
     */
-   intel_batchbuffer_emit_mi_flush(intel->batch);
+   intel_batchbuffer_emit_mi_flush(intel);
 }
 
 #if FEATURE_APPLE_object_purgeable
diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.h b/src/mesa/drivers/dri/intel/intel_buffer_objects.h
index b15c192106..81ee21f062 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.h
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.h
@@ -42,6 +42,8 @@ struct intel_buffer_object
 {
    struct gl_buffer_object Base;
    drm_intel_bo *buffer;     /* the low-level buffer manager's buffer handle */
+   GLuint offset;            /* any offset into that buffer */
+
    /** System memory buffer data, if not using a BO to store the data. */
    void *sys_buffer;
 
@@ -55,6 +57,7 @@ struct intel_buffer_object
    GLsizei range_map_size;
 
    GLboolean mapped_gtt;
+   GLboolean source;
 };
 
 
@@ -63,8 +66,26 @@ struct intel_buffer_object
 drm_intel_bo *intel_bufferobj_buffer(struct intel_context *intel,
 				     struct intel_buffer_object *obj,
 				     GLuint flag);
+drm_intel_bo *intel_bufferobj_source(struct intel_context *intel,
+				     struct intel_buffer_object *obj,
+				     GLuint align,
+				     GLuint *offset);
+
+void intel_upload_data(struct intel_context *intel,
+		       const void *ptr, GLuint size, GLuint align,
+		       drm_intel_bo **return_bo,
+		       GLuint *return_offset);
+
+void *intel_upload_map(struct intel_context *intel,
+		       GLuint size, GLuint align);
+void intel_upload_unmap(struct intel_context *intel,
+			const void *ptr, GLuint size, GLuint align,
+			drm_intel_bo **return_bo,
+			GLuint *return_offset);
+
+void intel_upload_finish(struct intel_context *intel);
 
-/* Hook the bufferobject implementation into mesa: 
+/* Hook the bufferobject implementation into mesa:
  */
 void intelInitBufferObjectFuncs(struct dd_function_table *functions);
 
diff --git a/src/mesa/drivers/dri/intel/intel_chipset.h b/src/mesa/drivers/dri/intel/intel_chipset.h
index 4fecdbed20..4ff9140d56 100644
--- a/src/mesa/drivers/dri/intel/intel_chipset.h
+++ b/src/mesa/drivers/dri/intel/intel_chipset.h
@@ -133,6 +133,10 @@
 				 devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS || \
 				 devid == PCI_CHIP_SANDYBRIDGE_S)
 
+#define IS_GT1(devid)		(devid == PCI_CHIP_SANDYBRIDGE_GT1 || \
+				 devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \
+				 devid == PCI_CHIP_SANDYBRIDGE_S)
+
 #define IS_965(devid)		(IS_GEN4(devid) || \
 				 IS_G4X(devid) || \
 				 IS_GEN5(devid) || \
diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c
index fa451f0045..82d29e7671 100644
--- a/src/mesa/drivers/dri/intel/intel_clear.c
+++ b/src/mesa/drivers/dri/intel/intel_clear.c
@@ -85,6 +85,8 @@ intelClear(struct gl_context *ctx, GLbitfield mask)
    GLbitfield blit_mask = 0;
    GLbitfield swrast_mask = 0;
    struct gl_framebuffer *fb = ctx->DrawBuffer;
+   struct intel_renderbuffer *irb;
+   int i;
 
    if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) {
       intel->front_buffer_dirty = GL_TRUE;
@@ -93,6 +95,22 @@ intelClear(struct gl_context *ctx, GLbitfield mask)
    if (0)
       fprintf(stderr, "%s\n", __FUNCTION__);
 
+   /* Get SW clears out of the way: Anything without an intel_renderbuffer */
+   for (i = 0; i < BUFFER_COUNT; i++) {
+      if (!(mask & (1 << i)))
+	 continue;
+
+      irb = intel_get_renderbuffer(fb, i);
+      if (unlikely(!irb)) {
+	 swrast_mask |= (1 << i);
+	 mask &= ~(1 << i);
+      }
+   }
+   if (unlikely(swrast_mask)) {
+      debug_mask("swrast", swrast_mask);
+      _swrast_Clear(ctx, swrast_mask);
+   }
+
    /* HW color buffers (front, back, aux, generic FBO, etc) */
    if (colorMask == ~0) {
       /* clear all R,G,B,A */
@@ -151,44 +169,18 @@ intelClear(struct gl_context *ctx, GLbitfield mask)
       }
    }
 
-   if (intel->gen >= 6) {
-      /* Blits are in a different ringbuffer so we don't use them. */
-      tri_mask |= blit_mask;
-      blit_mask = 0;
-   }
-
-   /* SW fallback clearing */
-   swrast_mask = mask & ~tri_mask & ~blit_mask;
-
-   {
-      /* look for non-Intel renderbuffers (clear them with swrast) */
-      GLbitfield blit_or_tri = blit_mask | tri_mask;
-      while (blit_or_tri) {
-         GLuint i = _mesa_ffs(blit_or_tri) - 1;
-         GLbitfield bufBit = 1 << i;
-         if (!fb->Attachment[i].Renderbuffer->ClassID) {
-            blit_mask &= ~bufBit;
-            tri_mask &= ~bufBit;
-            swrast_mask |= bufBit;
-         }
-         blit_or_tri ^= bufBit;
-      }
-   }
+   /* Anything left, just use tris */
+   tri_mask |= mask & ~blit_mask;
 
    if (blit_mask) {
       debug_mask("blit", blit_mask);
-      intelClearWithBlit(ctx, blit_mask);
+      tri_mask |= intelClearWithBlit(ctx, blit_mask);
    }
 
    if (tri_mask) {
       debug_mask("tri", tri_mask);
       _mesa_meta_Clear(&intel->ctx, tri_mask);
    }
-
-   if (swrast_mask) {
-      debug_mask("swrast", swrast_mask);
-      _swrast_Clear(ctx, swrast_mask);
-   }
 }
 
 
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index 968f3c6e46..7c422c4c3a 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -496,11 +496,9 @@ intel_prepare_render(struct intel_context *intel)
     * the swap, and getting our hands on that doesn't seem worth it,
     * so we just us the first batch we emitted after the last swap.
     */
-   if (intel->need_throttle && intel->first_post_swapbuffers_batch) {
-      drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch);
-      drm_intel_bo_unreference(intel->first_post_swapbuffers_batch);
-      intel->first_post_swapbuffers_batch = NULL;
-      intel->need_throttle = GL_FALSE;
+   if (intel->need_throttle) {
+       drmCommandNone(intel->driFd, DRM_I915_GEM_THROTTLE);
+       intel->need_throttle = GL_FALSE;
    }
 }
 
@@ -513,7 +511,7 @@ intel_viewport(struct gl_context *ctx, GLint x, GLint y, GLsizei w, GLsizei h)
     if (intel->saved_viewport)
 	intel->saved_viewport(ctx, x, y, w, h);
 
-    if (!intel->meta.internal_viewport_call && ctx->DrawBuffer->Name == 0) {
+    if (ctx->DrawBuffer->Name == 0) {
        dri2InvalidateDrawable(driContext->driDrawablePriv);
        dri2InvalidateDrawable(driContext->driReadablePriv);
     }
@@ -580,8 +578,8 @@ intel_flush(struct gl_context *ctx)
    if (intel->gen < 4)
       INTEL_FIREVERTICES(intel);
 
-   if (intel->batch->map != intel->batch->ptr)
-      intel_batchbuffer_flush(intel->batch);
+   if (intel->batch.used)
+      intel_batchbuffer_flush(intel);
 }
 
 static void
@@ -591,7 +589,8 @@ intel_glFlush(struct gl_context *ctx)
 
    intel_flush(ctx);
    intel_flush_front(ctx);
-   intel->need_throttle = GL_TRUE;
+   if (intel->is_front_buffer_rendering)
+      intel->need_throttle = GL_TRUE;
 }
 
 void
@@ -608,7 +607,7 @@ intelFinish(struct gl_context * ctx)
 
        irb = intel_renderbuffer(fb->_ColorDrawBuffers[i]);
 
-       if (irb && irb->region)
+       if (irb && irb->region && irb->region->buffer)
 	  drm_intel_bo_wait_rendering(irb->region->buffer);
    }
    if (fb->_DepthBuffer) {
@@ -669,8 +668,8 @@ intelInitContext(struct intel_context *intel,
       mesaVis = &visual;
    }
 
-   if (!_mesa_initialize_context_for_api(&intel->ctx, api, mesaVis, shareCtx,
-					 functions, (void *) intel)) {
+   if (!_mesa_initialize_context(&intel->ctx, api, mesaVis, shareCtx,
+                                 functions, (void *) intel)) {
       printf("%s: failed to init mesa context\n", __FUNCTION__);
       return GL_FALSE;
    }
@@ -708,12 +707,75 @@ intelInitContext(struct intel_context *intel,
       }
    }
 
+   memset(&ctx->TextureFormatSupported, 0,
+	  sizeof(ctx->TextureFormatSupported));
+   ctx->TextureFormatSupported[MESA_FORMAT_ARGB8888] = GL_TRUE;
+   if (intel->has_xrgb_textures)
+      ctx->TextureFormatSupported[MESA_FORMAT_XRGB8888] = GL_TRUE;
+   ctx->TextureFormatSupported[MESA_FORMAT_ARGB4444] = GL_TRUE;
+   ctx->TextureFormatSupported[MESA_FORMAT_ARGB1555] = GL_TRUE;
+   ctx->TextureFormatSupported[MESA_FORMAT_RGB565] = GL_TRUE;
+   ctx->TextureFormatSupported[MESA_FORMAT_L8] = GL_TRUE;
+   ctx->TextureFormatSupported[MESA_FORMAT_A8] = GL_TRUE;
+   ctx->TextureFormatSupported[MESA_FORMAT_I8] = GL_TRUE;
+   ctx->TextureFormatSupported[MESA_FORMAT_AL88] = GL_TRUE;
+   if (intel->gen >= 4)
+      ctx->TextureFormatSupported[MESA_FORMAT_AL1616] = GL_TRUE;
+   ctx->TextureFormatSupported[MESA_FORMAT_S8_Z24] = GL_TRUE;
+   /*
+    * This was disabled in initial FBO enabling to avoid combinations
+    * of depth+stencil that wouldn't work together.  We since decided
+    * that it was OK, since it's up to the app to come up with the
+    * combo that actually works, so this can probably be re-enabled.
+    */
+   /*
+   ctx->TextureFormatSupported[MESA_FORMAT_Z16] = GL_TRUE;
+   ctx->TextureFormatSupported[MESA_FORMAT_Z24] = GL_TRUE;
+   */
+
+   /* ctx->Extensions.MESA_ycbcr_texture */
+   ctx->TextureFormatSupported[MESA_FORMAT_YCBCR] = GL_TRUE;
+   ctx->TextureFormatSupported[MESA_FORMAT_YCBCR_REV] = GL_TRUE;
+
+   /* GL_3DFX_texture_compression_FXT1 */
+   ctx->TextureFormatSupported[MESA_FORMAT_RGB_FXT1] = GL_TRUE;
+   ctx->TextureFormatSupported[MESA_FORMAT_RGBA_FXT1] = GL_TRUE;
+
+   /* GL_EXT_texture_compression_s3tc */
+   ctx->TextureFormatSupported[MESA_FORMAT_RGB_DXT1] = GL_TRUE;
+   ctx->TextureFormatSupported[MESA_FORMAT_RGBA_DXT1] = GL_TRUE;
+   ctx->TextureFormatSupported[MESA_FORMAT_RGBA_DXT3] = GL_TRUE;
+   ctx->TextureFormatSupported[MESA_FORMAT_RGBA_DXT5] = GL_TRUE;
+
+#ifndef I915
+   /* GL_ARB_texture_rg */
+   ctx->TextureFormatSupported[MESA_FORMAT_R8] = GL_TRUE;
+   ctx->TextureFormatSupported[MESA_FORMAT_R16] = GL_TRUE;
+   ctx->TextureFormatSupported[MESA_FORMAT_RG88] = GL_TRUE;
+   ctx->TextureFormatSupported[MESA_FORMAT_RG1616] = GL_TRUE;
+
+   ctx->TextureFormatSupported[MESA_FORMAT_DUDV8] = GL_TRUE;
+   ctx->TextureFormatSupported[MESA_FORMAT_SIGNED_RGBA8888_REV] = GL_TRUE;
+
+   /* GL_EXT_texture_sRGB */
+   ctx->TextureFormatSupported[MESA_FORMAT_SARGB8] = GL_TRUE;
+   if (intel->gen >= 5 || intel->is_g4x)
+      ctx->TextureFormatSupported[MESA_FORMAT_SRGB_DXT1] = GL_TRUE;
+   ctx->TextureFormatSupported[MESA_FORMAT_SRGBA_DXT1] = GL_TRUE;
+   ctx->TextureFormatSupported[MESA_FORMAT_SRGBA_DXT3] = GL_TRUE;
+   ctx->TextureFormatSupported[MESA_FORMAT_SRGBA_DXT5] = GL_TRUE;
+   if (intel->has_luminance_srgb) {
+      ctx->TextureFormatSupported[MESA_FORMAT_SL8] = GL_TRUE;
+      ctx->TextureFormatSupported[MESA_FORMAT_SLA8] = GL_TRUE;
+   }
+#endif
+
    driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache,
                        sPriv->myNum, (intel->gen >= 4) ? "i965" : "i915");
-   if (intelScreen->deviceID == PCI_CHIP_I865_G)
+   if (intel->gen < 4)
       intel->maxBatchSize = 4096;
    else
-      intel->maxBatchSize = BATCH_SZ;
+      intel->maxBatchSize = sizeof(intel->batch.map);
 
    intel->bufmgr = intelScreen->bufmgr;
 
@@ -767,8 +829,8 @@ intelInitContext(struct intel_context *intel,
     */
    _mesa_init_point(ctx);
 
-   meta_init_metaops(ctx, &intel->meta);
    if (intel->gen >= 4) {
+      ctx->Const.sRGBCapable = GL_TRUE;
       if (MAX_WIDTH > 8192)
 	 ctx->Const.MaxRenderbufferSize = 8192;
    } else {
@@ -826,7 +888,7 @@ intelInitContext(struct intel_context *intel,
    if (INTEL_DEBUG & DEBUG_BUFMGR)
       dri_bufmgr_set_debug(intel->bufmgr, GL_TRUE);
 
-   intel->batch = intel_batchbuffer_alloc(intel);
+   intel_batchbuffer_reset(intel);
 
    intel_fbo_init(intel);
 
@@ -874,8 +936,6 @@ intelDestroyContext(__DRIcontext * driContextPriv)
 
       _mesa_meta_free(&intel->ctx);
 
-      meta_destroy_metaops(&intel->meta);
-
       intel->vtbl.destroy(intel);
 
       _swsetup_DestroyContext(&intel->ctx);
@@ -885,15 +945,12 @@ intelDestroyContext(__DRIcontext * driContextPriv)
       _swrast_DestroyContext(&intel->ctx);
       intel->Fallback = 0x0;      /* don't call _swrast_Flush later */
 
-      intel_batchbuffer_free(intel->batch);
-      intel->batch = NULL;
+      intel_batchbuffer_free(intel);
 
       free(intel->prim.vb);
       intel->prim.vb = NULL;
       drm_intel_bo_unreference(intel->prim.vb_bo);
       intel->prim.vb_bo = NULL;
-      drm_intel_bo_unreference(intel->first_post_swapbuffers_batch);
-      intel->first_post_swapbuffers_batch = NULL;
 
       driDestroyOptionCache(&intel->optionCache);
 
diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
index 96493c0f2b..772b2fba5a 100644
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -29,10 +29,9 @@
 #define INTELCONTEXT_INC
 
 
-
+#include <stdbool.h>
 #include "main/mtypes.h"
 #include "main/mm.h"
-#include "dri_metaops.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -149,10 +148,9 @@ struct intel_context
       void (*assert_not_dirty) (struct intel_context *intel);
 
       void (*debug_batch)(struct intel_context *intel);
+      bool (*render_target_supported)(gl_format format);
    } vtbl;
 
-   struct dri_metaops meta;
-
    GLbitfield Fallback;  /**< mask of INTEL_FALLBACK_x bits */
    GLuint NewGLState;
 
@@ -171,17 +169,28 @@ struct intel_context
 
    int urb_size;
 
-   struct intel_batchbuffer *batch;
-   drm_intel_bo *first_post_swapbuffers_batch;
+   struct intel_batchbuffer {
+      drm_intel_bo *bo;
+      struct cached_batch_item *cached_items;
+
+      uint16_t emit, total;
+      uint16_t used, reserved_space;
+      uint32_t map[8192];
+#define BATCH_SZ (8192*sizeof(uint32_t))
+
+      uint32_t state_batch_offset;
+      bool is_blit;
+   } batch;
+
    GLboolean need_throttle;
    GLboolean no_batch_wrap;
 
    struct
    {
       GLuint id;
+      uint32_t start_ptr; /**< for i8xx */
       uint32_t primitive;	/**< Current hardware primitive type */
       void (*flush) (struct intel_context *);
-      GLubyte *start_ptr; /**< for i8xx */
       drm_intel_bo *vb_bo;
       uint8_t *vb;
       unsigned int start_offset; /**< Byte offset of primitive sequence */
@@ -189,6 +198,14 @@ struct intel_context
       unsigned int count;	/**< Number of vertices in current primitive */
    } prim;
 
+   struct {
+      drm_intel_bo *bo;
+      GLuint offset;
+      uint32_t buffer_len;
+      uint32_t buffer_offset;
+      char buffer[4096];
+   } upload;
+
    GLuint stats_wm;
 
    /* Offsets of fields within the current vertex:
diff --git a/src/mesa/drivers/dri/intel/intel_decode.c b/src/mesa/drivers/dri/intel/intel_decode.c
index 25b4131594..688b8fee64 100644
--- a/src/mesa/drivers/dri/intel/intel_decode.c
+++ b/src/mesa/drivers/dri/intel/intel_decode.c
@@ -1601,10 +1601,12 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, int
 	{ 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" },
 	{ 0x790b, 4, 4, "3DSTATE_GS_SVB_INDEX" },
 	{ 0x790d, 3, 3, "3DSTATE_MULTISAMPLE" },
+	{ 0x7910, 2, 2, "3DSTATE_CLEAR_PARAMS" },
 	{ 0x7b00, 6, 6, "3DPRIMITIVE" },
 	{ 0x7802, 4, 4, "3DSTATE_SAMPLER_STATE_POINTERS" },
 	{ 0x7805, 3, 3, "3DSTATE_URB" },
 	{ 0x780e, 4, 4, "3DSTATE_CC_STATE_POINTERS" },
+	{ 0x780f, 2, 2, "3DSTATE_SCISSOR_STATE_POINTERS" },
 	{ 0x7810, 6, 6, "3DSTATE_VS_STATE" },
 	{ 0x7811, 7, 7, "3DSTATE_GS_STATE" },
 	{ 0x7812, 4, 4, "3DSTATE_CLIP_STATE" },
diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c
index 556a4195bd..febc1d4f85 100644
--- a/src/mesa/drivers/dri/intel/intel_extensions.c
+++ b/src/mesa/drivers/dri/intel/intel_extensions.c
@@ -25,12 +25,15 @@
  * 
  **************************************************************************/
 
+#include "main/mfeatures.h"
+
 #include "intel_chipset.h"
 #include "intel_context.h"
 #include "intel_extensions.h"
 #include "utils.h"
 
 
+#define need_GL_ARB_ES2_compatibility
 #define need_GL_ARB_draw_elements_base_vertex
 #define need_GL_ARB_framebuffer_object
 #define need_GL_ARB_map_buffer_range
@@ -78,8 +81,10 @@
  * i965_dri.
  */
 static const struct dri_extension card_extensions[] = {
+   { "GL_ARB_ES2_compatibility",          GL_ARB_ES2_compatibility_functions },
    { "GL_ARB_draw_elements_base_vertex",  GL_ARB_draw_elements_base_vertex_functions },
    { "GL_ARB_explicit_attrib_location",   NULL },
+   { "GL_ARB_framebuffer_object",         GL_ARB_framebuffer_object_functions},
    { "GL_ARB_half_float_pixel",           NULL },
    { "GL_ARB_map_buffer_range",           GL_ARB_map_buffer_range_functions },
    { "GL_ARB_multitexture",               NULL },
@@ -161,7 +166,6 @@ static const struct dri_extension brw_extensions[] = {
    { "GL_ARB_fragment_program",           NULL },
    { "GL_ARB_fragment_program_shadow",    NULL },
    { "GL_ARB_fragment_shader",            NULL },
-   { "GL_ARB_framebuffer_object",         GL_ARB_framebuffer_object_functions},
    { "GL_ARB_half_float_vertex",          NULL },
    { "GL_ARB_occlusion_query",            GL_ARB_occlusion_query_functions },
    { "GL_ARB_point_sprite", 		  NULL },
@@ -171,9 +175,11 @@ static const struct dri_extension brw_extensions[] = {
    { "GL_ARB_texture_non_power_of_two",   NULL },
    { "GL_ARB_texture_rg",                 NULL },
    { "GL_EXT_draw_buffers2",              GL_EXT_draw_buffers2_functions },
+   { "GL_EXT_framebuffer_sRGB",           NULL },
    { "GL_EXT_shadow_funcs",               NULL },
    { "GL_EXT_stencil_two_side",           GL_EXT_stencil_two_side_functions },
    { "GL_EXT_texture_sRGB",		  NULL },
+   { "GL_EXT_texture_sRGB_decode",	  NULL },
    { "GL_EXT_texture_swizzle",		  NULL },
    { "GL_EXT_vertex_array_bgra",	  NULL },
    { "GL_ATI_envmap_bumpmap",             GL_ATI_envmap_bumpmap_functions },
diff --git a/src/mesa/drivers/dri/intel/intel_extensions_es2.c b/src/mesa/drivers/dri/intel/intel_extensions_es2.c
index 54b0517deb..747ddf7644 100644
--- a/src/mesa/drivers/dri/intel/intel_extensions_es2.c
+++ b/src/mesa/drivers/dri/intel/intel_extensions_es2.c
@@ -26,6 +26,7 @@
  **************************************************************************/
 
 #include "main/extensions.h"
+#include "main/mfeatures.h"
 
 #include "intel_extensions.h"
 
@@ -62,6 +63,7 @@ static const char *es2_extensions[] = {
    "GL_EXT_blend_minmax",
    "GL_EXT_blend_subtract",
    "GL_EXT_stencil_wrap",
+   "GL_NV_blend_square",
 
    /* Optional GLES2 */
    "GL_ARB_framebuffer_object",
@@ -95,9 +97,22 @@ intelInitExtensionsES1(struct gl_context *ctx)
    _mesa_enable_extension(ctx, "GL_ARB_point_parameters");
 
    _mesa_enable_extension(ctx, "GL_OES_draw_texture");
+
+   _mesa_enable_extension(ctx, "GL_OES_point_sprite");
 }
 
 /**
+ * \brief Extensions to disable.
+ *
+ * These extensions must be manually disabled because they may have been
+ * enabled by default.
+ */
+static const char* es2_extensions_disabled[] = {
+   "GL_OES_standard_derivatives",
+   NULL,
+};
+
+/**
  * Initializes potential list of extensions if ctx == NULL, or actually enables
  * extensions for a context.
  */
@@ -111,4 +126,6 @@ intelInitExtensionsES2(struct gl_context *ctx)
 
    for (i = 0; es2_extensions[i]; i++)
       _mesa_enable_extension(ctx, es2_extensions[i]);
+   for (i = 0; es2_extensions_disabled[i]; i++)
+      _mesa_disable_extension(ctx, es2_extensions_disabled[i]);
 }
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 18e796a118..8b57eb19f5 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -28,11 +28,13 @@
 
 #include "main/imports.h"
 #include "main/macros.h"
+#include "main/mfeatures.h"
 #include "main/mtypes.h"
 #include "main/fbobject.h"
 #include "main/framebuffer.h"
 #include "main/renderbuffer.h"
 #include "main/context.h"
+#include "main/teximage.h"
 #include "main/texrender.h"
 #include "drivers/common/meta.h"
 
@@ -42,6 +44,11 @@
 #include "intel_fbo.h"
 #include "intel_mipmap_tree.h"
 #include "intel_regions.h"
+#include "intel_tex.h"
+#include "intel_span.h"
+#ifndef I915
+#include "brw_context.h"
+#endif
 
 #define FILE_DEBUG_FLAG DEBUG_FBO
 
@@ -107,79 +114,27 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer
    ASSERT(rb->Name != 0);
 
    switch (internalFormat) {
-   case GL_RED:
-   case GL_R8:
-      rb->Format = MESA_FORMAT_R8;
-      rb->DataType = GL_UNSIGNED_BYTE;
-      break;
-   case GL_R16:
-      rb->Format = MESA_FORMAT_R16;
-      rb->DataType = GL_UNSIGNED_SHORT;
-      break;
-   case GL_RG:
-   case GL_RG8:
-      rb->Format = MESA_FORMAT_RG88;
-      rb->DataType = GL_UNSIGNED_BYTE;
-      break;
-   case GL_RG16:
-      rb->Format = MESA_FORMAT_RG1616;
-      rb->DataType = GL_UNSIGNED_SHORT;
-      break;
-   case GL_R3_G3_B2:
-   case GL_RGB4:
-   case GL_RGB5:
-      rb->Format = MESA_FORMAT_RGB565;
-      rb->DataType = GL_UNSIGNED_BYTE;
-      break;
-   case GL_RGB:
-   case GL_RGB8:
-   case GL_RGB10:
-   case GL_RGB12:
-   case GL_RGB16:
-      rb->Format = MESA_FORMAT_XRGB8888;
-      rb->DataType = GL_UNSIGNED_BYTE;
-      break;
-   case GL_RGBA:
-   case GL_RGBA2:
-   case GL_RGBA4:
-   case GL_RGB5_A1:
-   case GL_RGBA8:
-   case GL_RGB10_A2:
-   case GL_RGBA12:
-   case GL_RGBA16:
-      rb->Format = MESA_FORMAT_ARGB8888;
-      rb->DataType = GL_UNSIGNED_BYTE;
-      break;
-   case GL_ALPHA:
-   case GL_ALPHA8:
-      rb->Format = MESA_FORMAT_A8;
-      rb->DataType = GL_UNSIGNED_BYTE;
-      break;
-   case GL_DEPTH_COMPONENT16:
-      rb->Format = MESA_FORMAT_Z16;
-      rb->DataType = GL_UNSIGNED_SHORT;
+   default:
+      /* Use the same format-choice logic as for textures.
+       * Renderbuffers aren't any different from textures for us,
+       * except they're less useful because you can't texture with
+       * them.
+       */
+      rb->Format = intel->ctx.Driver.ChooseTextureFormat(ctx, internalFormat,
+							 GL_NONE, GL_NONE);
       break;
    case GL_STENCIL_INDEX:
    case GL_STENCIL_INDEX1_EXT:
    case GL_STENCIL_INDEX4_EXT:
    case GL_STENCIL_INDEX8_EXT:
    case GL_STENCIL_INDEX16_EXT:
-   case GL_DEPTH_COMPONENT:
-   case GL_DEPTH_COMPONENT24:
-   case GL_DEPTH_COMPONENT32:
-   case GL_DEPTH_STENCIL_EXT:
-   case GL_DEPTH24_STENCIL8_EXT:
-      /* alloc a depth+stencil buffer */
+      /* These aren't actual texture formats, so force them here. */
       rb->Format = MESA_FORMAT_S8_Z24;
-      rb->DataType = GL_UNSIGNED_INT_24_8_EXT;
       break;
-   default:
-      _mesa_problem(ctx,
-                    "Unexpected format in intel_alloc_renderbuffer_storage");
-      return GL_FALSE;
    }
 
    rb->_BaseFormat = _mesa_base_fbo_format(ctx, internalFormat);
+   rb->DataType = intel_mesa_format_to_rb_datatype(rb->Format);
    cpp = _mesa_get_format_bytes(rb->Format);
 
    intel_flush(ctx);
@@ -195,10 +150,15 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer
    DBG("Allocating %d x %d Intel RBO\n", width, height);
 
    tiling = I915_TILING_NONE;
+   if (intel->use_texture_tiling) {
+      GLenum base_format = _mesa_get_format_base_format(rb->Format);
 
-   /* Gen6 requires depth must be tiling */
-   if (intel->gen >= 6 && rb->Format == MESA_FORMAT_S8_Z24)
-       tiling = I915_TILING_Y;
+      if (intel->gen >= 4 && (base_format == GL_DEPTH_COMPONENT ||
+			      base_format == GL_DEPTH_STENCIL))
+	 tiling = I915_TILING_Y;
+      else
+	 tiling = I915_TILING_X;
+   }
 
    irb->region = intel_region_alloc(intel->intelScreen, tiling, cpp,
 				    width, height, GL_TRUE);
@@ -334,53 +294,10 @@ intel_create_renderbuffer(gl_format format)
 
    _mesa_init_renderbuffer(&irb->Base, 0);
    irb->Base.ClassID = INTEL_RB_CLASS;
-
-   switch (format) {
-   case MESA_FORMAT_RGB565:
-      irb->Base._BaseFormat = GL_RGB;
-      irb->Base.DataType = GL_UNSIGNED_BYTE;
-      break;
-   case MESA_FORMAT_XRGB8888:
-      irb->Base._BaseFormat = GL_RGB;
-      irb->Base.DataType = GL_UNSIGNED_BYTE;
-      break;
-   case MESA_FORMAT_ARGB8888:
-      irb->Base._BaseFormat = GL_RGBA;
-      irb->Base.DataType = GL_UNSIGNED_BYTE;
-      break;
-   case MESA_FORMAT_Z16:
-      irb->Base._BaseFormat = GL_DEPTH_COMPONENT;
-      irb->Base.DataType = GL_UNSIGNED_SHORT;
-      break;
-   case MESA_FORMAT_X8_Z24:
-      irb->Base._BaseFormat = GL_DEPTH_COMPONENT;
-      irb->Base.DataType = GL_UNSIGNED_INT;
-      break;
-   case MESA_FORMAT_S8_Z24:
-      irb->Base._BaseFormat = GL_DEPTH_STENCIL;
-      irb->Base.DataType = GL_UNSIGNED_INT_24_8_EXT;
-      break;
-   case MESA_FORMAT_A8:
-      irb->Base._BaseFormat = GL_ALPHA;
-      irb->Base.DataType = GL_UNSIGNED_BYTE;
-      break;
-   case MESA_FORMAT_R8:
-      irb->Base._BaseFormat = GL_RED;
-      irb->Base.DataType = GL_UNSIGNED_BYTE;
-      break;
-   case MESA_FORMAT_RG88:
-      irb->Base._BaseFormat = GL_RG;
-      irb->Base.DataType = GL_UNSIGNED_BYTE;
-      break;
-   default:
-      _mesa_problem(NULL,
-                    "Unexpected intFormat in intel_create_renderbuffer");
-      free(irb);
-      return NULL;
-   }
-
+   irb->Base._BaseFormat = _mesa_get_format_base_format(format);
    irb->Base.Format = format;
    irb->Base.InternalFormat = irb->Base._BaseFormat;
+   irb->Base.DataType = intel_mesa_format_to_rb_datatype(format);
 
    /* intel-specific methods */
    irb->Base.Delete = intel_delete_renderbuffer;
@@ -457,70 +374,16 @@ static GLboolean
 intel_update_wrapper(struct gl_context *ctx, struct intel_renderbuffer *irb, 
 		     struct gl_texture_image *texImage)
 {
-   if (texImage->TexFormat == MESA_FORMAT_ARGB8888) {
-      irb->Base.DataType = GL_UNSIGNED_BYTE;
-      DBG("Render to RGBA8 texture OK\n");
-   }
-   else if (texImage->TexFormat == MESA_FORMAT_XRGB8888) {
-      irb->Base.DataType = GL_UNSIGNED_BYTE;
-      DBG("Render to XGBA8 texture OK\n");
-   }
-#ifndef I915
-   else if (texImage->TexFormat == MESA_FORMAT_SARGB8) {
-      irb->Base.DataType = GL_UNSIGNED_BYTE;
-      DBG("Render to SARGB8 texture OK\n");
-   }
-#endif
-   else if (texImage->TexFormat == MESA_FORMAT_RGB565) {
-      irb->Base.DataType = GL_UNSIGNED_BYTE;
-      DBG("Render to RGB5 texture OK\n");
-   }
-   else if (texImage->TexFormat == MESA_FORMAT_ARGB1555) {
-      irb->Base.DataType = GL_UNSIGNED_BYTE;
-      DBG("Render to ARGB1555 texture OK\n");
-   }
-   else if (texImage->TexFormat == MESA_FORMAT_ARGB4444) {
-      irb->Base.DataType = GL_UNSIGNED_BYTE;
-      DBG("Render to ARGB4444 texture OK\n");
-   }
-#ifndef I915
-   else if (texImage->TexFormat == MESA_FORMAT_A8) {
-      irb->Base.DataType = GL_UNSIGNED_BYTE;
-      DBG("Render to A8 texture OK\n");
-   }
-   else if (texImage->TexFormat == MESA_FORMAT_R8) {
-      irb->Base.DataType = GL_UNSIGNED_BYTE;
-      DBG("Render to R8 texture OK\n");
-   }
-   else if (texImage->TexFormat == MESA_FORMAT_RG88) {
-      irb->Base.DataType = GL_UNSIGNED_BYTE;
-      DBG("Render to RG88 texture OK\n");
-   }
-   else if (texImage->TexFormat == MESA_FORMAT_R16) {
-      irb->Base.DataType = GL_UNSIGNED_SHORT;
-      DBG("Render to R8 texture OK\n");
-   }
-   else if (texImage->TexFormat == MESA_FORMAT_RG1616) {
-      irb->Base.DataType = GL_UNSIGNED_SHORT;
-      DBG("Render to RG88 texture OK\n");
-   }
-#endif
-   else if (texImage->TexFormat == MESA_FORMAT_Z16) {
-      irb->Base.DataType = GL_UNSIGNED_SHORT;
-      DBG("Render to DEPTH16 texture OK\n");
-   }
-   else if (texImage->TexFormat == MESA_FORMAT_S8_Z24) {
-      irb->Base.DataType = GL_UNSIGNED_INT_24_8_EXT;
-      DBG("Render to DEPTH_STENCIL texture OK\n");
-   }
-   else {
+   if (!intel_span_supports_format(texImage->TexFormat)) {
       DBG("Render to texture BAD FORMAT %s\n",
 	  _mesa_get_format_name(texImage->TexFormat));
       return GL_FALSE;
+   } else {
+      DBG("Render to texture %s\n", _mesa_get_format_name(texImage->TexFormat));
    }
 
    irb->Base.Format = texImage->TexFormat;
-
+   irb->Base.DataType = intel_mesa_format_to_rb_datatype(texImage->TexFormat);
    irb->Base.InternalFormat = texImage->InternalFormat;
    irb->Base._BaseFormat = _mesa_base_fbo_format(ctx, irb->Base.InternalFormat);
    irb->Base.Width = texImage->Width;
@@ -562,6 +425,24 @@ intel_wrap_texture(struct gl_context * ctx, struct gl_texture_image *texImage)
    return irb;
 }
 
+static void
+intel_set_draw_offset_for_image(struct intel_texture_image *intel_image,
+				int zoffset)
+{
+   struct intel_mipmap_tree *mt = intel_image->mt;
+   unsigned int dst_x, dst_y;
+
+   /* compute offset of the particular 2D image within the texture region */
+   intel_miptree_get_image_offset(intel_image->mt,
+				  intel_image->level,
+				  intel_image->face,
+				  zoffset,
+				  &dst_x, &dst_y);
+
+   mt->region->draw_offset = (dst_y * mt->region->pitch + dst_x) * mt->cpp;
+   mt->region->draw_x = dst_x;
+   mt->region->draw_y = dst_y;
+}
 
 /**
  * Called by glFramebufferTexture[123]DEXT() (and other places) to
@@ -578,7 +459,6 @@ intel_render_texture(struct gl_context * ctx,
       = att->Texture->Image[att->CubeMapFace][att->TextureLevel];
    struct intel_renderbuffer *irb = intel_renderbuffer(att->Renderbuffer);
    struct intel_texture_image *intel_image;
-   GLuint dst_x, dst_y;
 
    (void) fb;
 
@@ -624,19 +504,53 @@ intel_render_texture(struct gl_context * ctx,
       intel_region_reference(&irb->region, intel_image->mt->region);
    }
 
-   /* compute offset of the particular 2D image within the texture region */
-   intel_miptree_get_image_offset(intel_image->mt,
-				  att->TextureLevel,
-				  att->CubeMapFace,
-				  att->Zoffset,
-				  &dst_x, &dst_y);
-
-   intel_image->mt->region->draw_offset = (dst_y * intel_image->mt->region->pitch +
-					   dst_x) * intel_image->mt->cpp;
-   intel_image->mt->region->draw_x = dst_x;
-   intel_image->mt->region->draw_y = dst_y;
+   intel_set_draw_offset_for_image(intel_image, att->Zoffset);
    intel_image->used_as_render_target = GL_TRUE;
 
+#ifndef I915
+   if (!brw_context(ctx)->has_surface_tile_offset &&
+       (intel_image->mt->region->draw_offset & 4095) != 0) {
+      /* Original gen4 hardware couldn't draw to a non-tile-aligned
+       * destination in a miptree unless you actually setup your
+       * renderbuffer as a miptree and used the fragile
+       * lod/array_index/etc. controls to select the image.  So,
+       * instead, we just make a new single-level miptree and render
+       * into that.
+       */
+      struct intel_context *intel = intel_context(ctx);
+      struct intel_mipmap_tree *old_mt = intel_image->mt;
+      struct intel_mipmap_tree *new_mt;
+      int comp_byte = 0, texel_bytes;
+
+      if (_mesa_is_format_compressed(intel_image->base.TexFormat))
+	 comp_byte = intel_compressed_num_bytes(intel_image->base.TexFormat);
+
+      texel_bytes = _mesa_get_format_bytes(intel_image->base.TexFormat);
+
+      new_mt = intel_miptree_create(intel, newImage->TexObject->Target,
+				    intel_image->base._BaseFormat,
+				    intel_image->base.InternalFormat,
+				    intel_image->level,
+				    intel_image->level,
+				    intel_image->base.Width,
+				    intel_image->base.Height,
+				    intel_image->base.Depth,
+				    texel_bytes, comp_byte, GL_TRUE);
+
+      intel_miptree_image_copy(intel,
+                               new_mt,
+                               intel_image->face,
+			       intel_image->level,
+			       old_mt);
+
+      intel_miptree_release(intel, &intel_image->mt);
+      intel_image->mt = new_mt;
+      intel_set_draw_offset_for_image(intel_image, att->Zoffset);
+
+      intel_region_release(&irb->region);
+      intel_region_reference(&irb->region, intel_image->mt->region);
+   }
+#endif
    /* update drawing region, etc */
    intel_draw_buffer(ctx, fb);
 }
@@ -659,14 +573,15 @@ intel_finish_render_texture(struct gl_context * ctx,
        _glthread_GetID(), att->Texture->Name);
 
    /* Flag that this image may now be validated into the object's miptree. */
-   intel_image->used_as_render_target = GL_FALSE;
+   if (intel_image)
+      intel_image->used_as_render_target = GL_FALSE;
 
    /* Since we've (probably) rendered to the texture and will (likely) use
     * it in the texture domain later on in this batchbuffer, flush the
     * batch.  Once again, we wish for a domain tracker in libdrm to cover
     * usage inside of a batchbuffer like GEM does in the kernel.
     */
-   intel_batchbuffer_emit_mi_flush(intel->batch);
+   intel_batchbuffer_emit_mi_flush(intel);
 }
 
 /**
@@ -675,6 +590,7 @@ intel_finish_render_texture(struct gl_context * ctx,
 static void
 intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb)
 {
+   struct intel_context *intel = intel_context(ctx);
    const struct intel_renderbuffer *depthRb =
       intel_get_renderbuffer(fb, BUFFER_DEPTH);
    const struct intel_renderbuffer *stencilRb =
@@ -682,10 +598,10 @@ intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb)
    int i;
 
    if (depthRb && stencilRb && stencilRb != depthRb) {
-      if (ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Type == GL_TEXTURE &&
-	  ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Type == GL_TEXTURE &&
-	  (ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Texture->Name ==
-	   ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Texture->Name)) {
+      if (fb->Attachment[BUFFER_DEPTH].Type == GL_TEXTURE &&
+	  fb->Attachment[BUFFER_STENCIL].Type == GL_TEXTURE &&
+	  (fb->Attachment[BUFFER_DEPTH].Texture->Name ==
+	   fb->Attachment[BUFFER_STENCIL].Texture->Name)) {
 	 /* OK */
       } else {
 	 /* we only support combined depth/stencil buffers, not separate
@@ -698,40 +614,118 @@ intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb)
       }
    }
 
-   for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) {
-      struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
-      struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   for (i = 0; i < Elements(fb->Attachment); i++) {
+      struct gl_renderbuffer *rb;
+      struct intel_renderbuffer *irb;
 
-      if (rb == NULL)
+      if (fb->Attachment[i].Type == GL_NONE)
 	 continue;
 
+      /* A supported attachment will have a Renderbuffer set either
+       * from being a Renderbuffer or being a texture that got the
+       * intel_wrap_texture() treatment.
+       */
+      rb = fb->Attachment[i].Renderbuffer;
+      if (rb == NULL) {
+	 DBG("attachment without renderbuffer\n");
+	 fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT;
+	 continue;
+      }
+
+      irb = intel_renderbuffer(rb);
       if (irb == NULL) {
 	 DBG("software rendering renderbuffer\n");
 	 fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT;
 	 continue;
       }
 
-      switch (irb->Base.Format) {
-      case MESA_FORMAT_ARGB8888:
-      case MESA_FORMAT_XRGB8888:
-      case MESA_FORMAT_RGB565:
-      case MESA_FORMAT_ARGB1555:
-      case MESA_FORMAT_ARGB4444:
-#ifndef I915
-      case MESA_FORMAT_SARGB8:
-      case MESA_FORMAT_A8:
-      case MESA_FORMAT_R8:
-      case MESA_FORMAT_R16:
-      case MESA_FORMAT_RG88:
-      case MESA_FORMAT_RG1616:
-#endif
-	 break;
-      default:
+      if (!intel_span_supports_format(irb->Base.Format) ||
+	  !intel->vtbl.render_target_supported(irb->Base.Format)) {
+	 DBG("Unsupported texture/renderbuffer format attached: %s\n",
+	     _mesa_get_format_name(irb->Base.Format));
 	 fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT;
       }
    }
 }
 
+/**
+ * Try to do a glBlitFramebuffer using glCopyTexSubImage2D
+ * We can do this when the dst renderbuffer is actually a texture and
+ * there is no scaling, mirroring or scissoring.
+ *
+ * \return new buffer mask indicating the buffers left to blit using the
+ *         normal path.
+ */
+static GLbitfield
+intel_blit_framebuffer_copy_tex_sub_image(struct gl_context *ctx,
+                                          GLint srcX0, GLint srcY0,
+                                          GLint srcX1, GLint srcY1,
+                                          GLint dstX0, GLint dstY0,
+                                          GLint dstX1, GLint dstY1,
+                                          GLbitfield mask, GLenum filter)
+{
+   if (mask & GL_COLOR_BUFFER_BIT) {
+      const struct gl_framebuffer *drawFb = ctx->DrawBuffer;
+      const struct gl_framebuffer *readFb = ctx->ReadBuffer;
+      const struct gl_renderbuffer_attachment *drawAtt =
+         &drawFb->Attachment[drawFb->_ColorDrawBufferIndexes[0]];
+
+      /* If the source and destination are the same size with no
+         mirroring, the rectangles are within the size of the
+         texture and there is no scissor then we can use
+         glCopyTexSubimage2D to implement the blit. This will end
+         up as a fast hardware blit on some drivers */
+      if (drawAtt && drawAtt->Texture &&
+          srcX0 - srcX1 == dstX0 - dstX1 &&
+          srcY0 - srcY1 == dstY0 - dstY1 &&
+          srcX1 >= srcX0 &&
+          srcY1 >= srcY0 &&
+          srcX0 >= 0 && srcX1 <= readFb->Width &&
+          srcY0 >= 0 && srcY1 <= readFb->Height &&
+          dstX0 >= 0 && dstX1 <= drawFb->Width &&
+          dstY0 >= 0 && dstY1 <= drawFb->Height &&
+          !ctx->Scissor.Enabled) {
+         const struct gl_texture_object *texObj = drawAtt->Texture;
+         const GLuint dstLevel = drawAtt->TextureLevel;
+         const GLenum target = texObj->Target;
+
+         struct gl_texture_image *texImage =
+            _mesa_select_tex_image(ctx, texObj, target, dstLevel);
+         GLenum internalFormat = texImage->InternalFormat;
+
+         if (intel_copy_texsubimage(intel_context(ctx), target,
+                                    intel_texture_image(texImage),
+                                    internalFormat,
+                                    dstX0, dstY0,
+                                    srcX0, srcY0,
+                                    srcX1 - srcX0, /* width */
+                                    srcY1 - srcY0))
+            mask &= ~GL_COLOR_BUFFER_BIT;
+      }
+   }
+
+   return mask;
+}
+
+static void
+intel_blit_framebuffer(struct gl_context *ctx,
+                       GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
+                       GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
+                       GLbitfield mask, GLenum filter)
+{
+   /* Try faster, glCopyTexSubImage2D approach first which uses the BLT. */
+   mask = intel_blit_framebuffer_copy_tex_sub_image(ctx,
+                                                    srcX0, srcY0, srcX1, srcY1,
+                                                    dstX0, dstY0, dstX1, dstY1,
+                                                    mask, filter);
+   if (mask == 0x0)
+      return;
+
+   _mesa_meta_BlitFramebuffer(ctx,
+                              srcX0, srcY0, srcX1, srcY1,
+                              dstX0, dstY0, dstX1, dstY1,
+                              mask, filter);
+}
 
 /**
  * Do one-time context initializations related to GL_EXT_framebuffer_object.
@@ -748,7 +742,7 @@ intel_fbo_init(struct intel_context *intel)
    intel->ctx.Driver.FinishRenderTexture = intel_finish_render_texture;
    intel->ctx.Driver.ResizeBuffers = intel_resize_buffers;
    intel->ctx.Driver.ValidateFramebuffer = intel_validate_framebuffer;
-   intel->ctx.Driver.BlitFramebuffer = _mesa_meta_BlitFramebuffer;
+   intel->ctx.Driver.BlitFramebuffer = intel_blit_framebuffer;
 
 #if FEATURE_OES_EGL_image
    intel->ctx.Driver.EGLImageTargetRenderbufferStorage =
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
index 9c4e5c5ee8..a3409274fb 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -157,8 +157,6 @@ struct intel_mipmap_tree *
 intel_miptree_create_for_region(struct intel_context *intel,
 				GLenum target,
 				GLenum internal_format,
-				GLuint first_level,
-				GLuint last_level,
 				struct intel_region *region,
 				GLuint depth0,
 				GLuint compress_byte)
@@ -166,7 +164,7 @@ intel_miptree_create_for_region(struct intel_context *intel,
    struct intel_mipmap_tree *mt;
 
    mt = intel_miptree_create_internal(intel, target, internal_format,
-				      first_level, last_level,
+				      0, 0,
 				      region->width, region->height, 1,
 				      region->cpp, compress_byte,
 				      I915_TILING_NONE);
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
index 21db2f4d3b..760a8bce60 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
@@ -137,8 +137,6 @@ struct intel_mipmap_tree *
 intel_miptree_create_for_region(struct intel_context *intel,
 				GLenum target,
 				GLenum internal_format,
-				GLuint first_level,
-				GLuint last_level,
 				struct intel_region *region,
 				GLuint depth0,
 				GLuint compress_byte);
diff --git a/src/mesa/drivers/dri/intel/intel_pixel.c b/src/mesa/drivers/dri/intel/intel_pixel.c
index d5c35775ce..f97256e59b 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel.c
@@ -66,12 +66,12 @@ intel_check_blit_fragment_ops(struct gl_context * ctx, GLboolean src_alpha_is_on
    }
 
    if (ctx->Color.BlendEnabled &&
-       (effective_func(ctx->Color.BlendSrcRGB, src_alpha_is_one) != GL_ONE ||
-	effective_func(ctx->Color.BlendDstRGB, src_alpha_is_one) != GL_ZERO ||
-	ctx->Color.BlendEquationRGB != GL_FUNC_ADD ||
-	effective_func(ctx->Color.BlendSrcA, src_alpha_is_one) != GL_ONE ||
-	effective_func(ctx->Color.BlendDstA, src_alpha_is_one) != GL_ZERO ||
-	ctx->Color.BlendEquationA != GL_FUNC_ADD)) {
+       (effective_func(ctx->Color.Blend[0].SrcRGB, src_alpha_is_one) != GL_ONE ||
+	effective_func(ctx->Color.Blend[0].DstRGB, src_alpha_is_one) != GL_ZERO ||
+	ctx->Color.Blend[0].EquationRGB != GL_FUNC_ADD ||
+	effective_func(ctx->Color.Blend[0].SrcA, src_alpha_is_one) != GL_ONE ||
+	effective_func(ctx->Color.Blend[0].DstA, src_alpha_is_one) != GL_ZERO ||
+	ctx->Color.Blend[0].EquationA != GL_FUNC_ADD)) {
       DBG("fallback due to blend\n");
       return GL_FALSE;
    }
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
index e7356a6da0..43cdd0d2ba 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
@@ -31,6 +31,7 @@
 #include "main/colormac.h"
 #include "main/mtypes.h"
 #include "main/macros.h"
+#include "main/pbo.h"
 #include "main/bufferobj.h"
 #include "main/state.h"
 #include "main/texobj.h"
@@ -207,7 +208,7 @@ do_blit_bitmap( struct gl_context *ctx,
 
    COPY_4V(tmpColor, ctx->Current.RasterColor);
 
-   if (NEED_SECONDARY_COLOR(ctx)) {
+   if (_mesa_need_secondary_color(ctx)) {
        ADD_3V(tmpColor, tmpColor, ctx->Current.RasterSecondaryColor);
    }
 
@@ -285,7 +286,7 @@ do_blit_bitmap( struct gl_context *ctx,
 out:
 
    if (unlikely(INTEL_DEBUG & DEBUG_SYNC))
-      intel_batchbuffer_flush(intel->batch);
+      intel_batchbuffer_flush(intel);
 
    if (_mesa_is_bufferobj(unpack->BufferObj)) {
       /* done with PBO so unmap it now */
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_read.c b/src/mesa/drivers/dri/intel/intel_pixel_read.c
index 54da29236d..b2e77c7986 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_read.c
@@ -80,6 +80,7 @@ do_blit_readpixels(struct gl_context * ctx,
    drm_intel_bo *dst_buffer;
    GLboolean all;
    GLint dst_x, dst_y;
+   GLuint dirty;
 
    DBG("%s\n", __FUNCTION__);
 
@@ -129,7 +130,9 @@ do_blit_readpixels(struct gl_context * ctx,
       return GL_TRUE;
    }
 
+   dirty = intel->front_buffer_dirty;
    intel_prepare_render(intel);
+   intel->front_buffer_dirty = dirty;
 
    all = (width * height * src->cpp == dst->Base.Size &&
 	  x == 0 && dst_offset == 0);
@@ -138,8 +141,8 @@ do_blit_readpixels(struct gl_context * ctx,
    dst_y = 0;
 
    dst_buffer = intel_bufferobj_buffer(intel, dst,
-					       all ? INTEL_WRITE_FULL :
-					       INTEL_WRITE_PART);
+				       all ? INTEL_WRITE_FULL :
+				       INTEL_WRITE_PART);
 
    if (ctx->ReadBuffer->Name == 0)
       y = ctx->ReadBuffer->Height - (y + height);
@@ -171,6 +174,10 @@ intelReadPixels(struct gl_context * ctx,
 
    DBG("%s\n", __FUNCTION__);
 
+   if (do_blit_readpixels
+       (ctx, x, y, width, height, format, type, pack, pixels))
+      return;
+
    intel_flush(ctx);
 
    /* glReadPixels() wont dirty the front buffer, so reset the dirty
@@ -179,10 +186,6 @@ intelReadPixels(struct gl_context * ctx,
    intel_prepare_render(intel);
    intel->front_buffer_dirty = dirty;
 
-   if (do_blit_readpixels
-       (ctx, x, y, width, height, format, type, pack, pixels))
-      return;
-
    fallback_debug("%s: fallback to swrast\n", __FUNCTION__);
 
    /* Update Mesa state before calling down into _swrast_ReadPixels, as
diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h
index 955b100b21..5258699d3f 100644
--- a/src/mesa/drivers/dri/intel/intel_reg.h
+++ b/src/mesa/drivers/dri/intel/intel_reg.h
@@ -37,6 +37,8 @@
 #define FLUSH_MAP_CACHE				(1 << 0)
 #define INHIBIT_FLUSH_RENDER_CACHE		(1 << 2)
 
+#define MI_FLUSH_DW			(CMD_MI | (0x26 << 23) | 2)
+
 /* Stalls command execution waiting for the given events to have occurred. */
 #define MI_WAIT_FOR_EVENT               (CMD_MI | (0x3 << 23))
 #define MI_WAIT_FOR_PLANE_B_FLIP        (1<<6)
diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c
index e87e29462c..a4da1ce4fa 100644
--- a/src/mesa/drivers/dri/intel/intel_regions.c
+++ b/src/mesa/drivers/dri/intel/intel_regions.c
@@ -149,11 +149,6 @@ intel_region_alloc_internal(struct intel_screen *screen,
 {
    struct intel_region *region;
 
-   if (buffer == NULL) {
-      _DBG("%s <-- NULL\n", __FUNCTION__);
-      return NULL;
-   }
-
    region = calloc(sizeof(*region), 1);
    if (region == NULL)
       return region;
@@ -180,6 +175,7 @@ intel_region_alloc(struct intel_screen *screen,
    drm_intel_bo *buffer;
    unsigned long flags = 0;
    unsigned long aligned_pitch;
+   struct intel_region *region;
 
    if (expect_accelerated_upload)
       flags |= BO_ALLOC_FOR_RENDER;
@@ -187,9 +183,17 @@ intel_region_alloc(struct intel_screen *screen,
    buffer = drm_intel_bo_alloc_tiled(screen->bufmgr, "region",
 				     width, height, cpp,
 				     &tiling, &aligned_pitch, flags);
+   if (buffer == NULL)
+      return NULL;
 
-   return intel_region_alloc_internal(screen, cpp, width, height,
-				      aligned_pitch / cpp, tiling, buffer);
+   region = intel_region_alloc_internal(screen, cpp, width, height,
+                                        aligned_pitch / cpp, tiling, buffer);
+   if (region == NULL) {
+      drm_intel_bo_unreference(buffer);
+      return NULL;
+   }
+
+   return region;
 }
 
 GLboolean
@@ -491,7 +495,7 @@ intel_region_cow(struct intel_context *intel, struct intel_region *region)
 
    assert(region->cpp * region->pitch * region->height == pbo->Base.Size);
 
-   _DBG("%s %p (%d bytes)\n", __FUNCTION__, region, pbo->Base.Size);
+   _DBG("%s %p (%d bytes)\n", __FUNCTION__, region, (int)pbo->Base.Size);
 
    /* Now blit from the texture buffer to the new buffer: 
     */
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c
index e5b6c9f0e4..c8cff0147e 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -25,29 +25,18 @@
  * 
  **************************************************************************/
 
+#include <errno.h>
 #include "main/glheader.h"
 #include "main/context.h"
 #include "main/framebuffer.h"
 #include "main/renderbuffer.h"
 #include "main/hash.h"
 #include "main/fbobject.h"
+#include "main/mfeatures.h"
 
 #include "utils.h"
 #include "xmlpool.h"
 
-#include "intel_batchbuffer.h"
-#include "intel_buffers.h"
-#include "intel_bufmgr.h"
-#include "intel_chipset.h"
-#include "intel_fbo.h"
-#include "intel_screen.h"
-#include "intel_tex.h"
-#include "intel_regions.h"
-
-#include "i915_drm.h"
-
-#define DRI_CONF_TEXTURE_TILING(def) \
-
 PUBLIC const char __driConfigOptions[] =
    DRI_CONF_BEGIN
    DRI_CONF_SECTION_PERFORMANCE
@@ -92,6 +81,17 @@ DRI_CONF_END;
 
 const GLuint __driNConfigOptions = 11;
 
+#include "intel_batchbuffer.h"
+#include "intel_buffers.h"
+#include "intel_bufmgr.h"
+#include "intel_chipset.h"
+#include "intel_fbo.h"
+#include "intel_screen.h"
+#include "intel_tex.h"
+#include "intel_regions.h"
+
+#include "i915_drm.h"
+
 #ifdef USE_NEW_INTERFACE
 static PFNGLXCREATECONTEXTMODES create_context_modes = NULL;
 #endif /*USE_NEW_INTERFACE */
@@ -105,15 +105,16 @@ static const __DRItexBufferExtension intelTexBufferExtension = {
 static void
 intelDRI2Flush(__DRIdrawable *drawable)
 {
-   struct intel_context *intel = drawable->driContextPriv->driverPrivate;
+   GET_CURRENT_CONTEXT(ctx);
+   struct intel_context *intel = intel_context(ctx);
 
    if (intel->gen < 4)
       INTEL_FIREVERTICES(intel);
 
    intel->need_throttle = GL_TRUE;
 
-   if (intel->batch->map != intel->batch->ptr)
-      intel_batchbuffer_flush(intel->batch);
+   if (intel->batch.used)
+      intel_batchbuffer_flush(intel);
 }
 
 static const struct __DRI2flushExtensionRec intelFlushExtension = {
@@ -312,13 +313,21 @@ intel_get_param(__DRIscreen *psp, int param, int *value)
 
    ret = drmCommandWriteRead(psp->fd, DRM_I915_GETPARAM, &gp, sizeof(gp));
    if (ret) {
-      _mesa_warning(NULL, "drm_i915_getparam: %d", ret);
+      if (ret != -EINVAL)
+	 _mesa_warning(NULL, "drm_i915_getparam: %d", ret);
       return GL_FALSE;
    }
 
    return GL_TRUE;
 }
 
+static GLboolean
+intel_get_boolean(__DRIscreen *psp, int param)
+{
+   int value = 0;
+   return intel_get_param(psp, param, &value) && value;
+}
+
 static void
 nop_callback(GLuint key, void *data, void *userData)
 {
@@ -492,6 +501,10 @@ intel_init_bufmgr(struct intel_screen *intelScreen)
 
    intelScreen->named_regions = _mesa_NewHashTable();
 
+   intelScreen->relaxed_relocations = 0;
+   intelScreen->relaxed_relocations |=
+      intel_get_boolean(spriv, I915_PARAM_HAS_RELAXED_DELTA) << 0;
+
    return GL_TRUE;
 }
 
@@ -645,6 +658,51 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
    return (const __DRIconfig **)configs;
 }
 
+struct intel_buffer {
+   __DRIbuffer base;
+   struct intel_region *region;
+};
+
+static __DRIbuffer *
+intelAllocateBuffer(__DRIscreen *screen,
+		    unsigned attachment, unsigned format,
+		    int width, int height)
+{
+   struct intel_buffer *intelBuffer;
+   struct intel_screen *intelScreen = screen->private;
+
+   intelBuffer = CALLOC(sizeof *intelBuffer);
+   if (intelBuffer == NULL)
+      return NULL;
+
+   intelBuffer->region = intel_region_alloc(intelScreen, I915_TILING_NONE,
+					    format / 8, width, height, GL_TRUE);
+   
+   if (intelBuffer->region == NULL) {
+	   FREE(intelBuffer);
+	   return NULL;
+   }
+   
+   intel_region_flink(intelBuffer->region, &intelBuffer->base.name);
+
+   intelBuffer->base.attachment = attachment;
+   intelBuffer->base.cpp = intelBuffer->region->cpp;
+   intelBuffer->base.pitch =
+         intelBuffer->region->pitch * intelBuffer->region->cpp;
+
+   return &intelBuffer->base;
+}
+
+static void
+intelReleaseBuffer(__DRIscreen *screen, __DRIbuffer *buffer)
+{
+   struct intel_buffer *intelBuffer = (struct intel_buffer *) buffer;
+
+   intel_region_release(&intelBuffer->region);
+   free(intelBuffer);
+}
+
+
 const struct __DriverAPIRec driDriverAPI = {
    .DestroyScreen	 = intelDestroyScreen,
    .CreateContext	 = intelCreateContext,
@@ -654,6 +712,8 @@ const struct __DriverAPIRec driDriverAPI = {
    .MakeCurrent		 = intelMakeCurrent,
    .UnbindContext	 = intelUnbindContext,
    .InitScreen2		 = intelInitScreen2,
+   .AllocateBuffer       = intelAllocateBuffer,
+   .ReleaseBuffer        = intelReleaseBuffer
 };
 
 /* This is the table of extensions that the loader will dlsym() for. */
diff --git a/src/mesa/drivers/dri/intel/intel_screen.h b/src/mesa/drivers/dri/intel/intel_screen.h
index 5863093f00..0f0b5be56d 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.h
+++ b/src/mesa/drivers/dri/intel/intel_screen.h
@@ -43,6 +43,7 @@ struct intel_screen
    __DRIscreen *driScrnPriv;
 
    GLboolean no_hw;
+   GLuint relaxed_relocations;
 
    GLboolean no_vbo;
    dri_bufmgr *bufmgr;
diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c
index 104cadf0f9..1f41518535 100644
--- a/src/mesa/drivers/dri/intel/intel_span.c
+++ b/src/mesa/drivers/dri/intel/intel_span.c
@@ -25,6 +25,7 @@
  * 
  **************************************************************************/
 
+#include <stdbool.h>
 #include "main/glheader.h"
 #include "main/macros.h"
 #include "main/mtypes.h"
@@ -113,6 +114,26 @@ intel_set_span_functions(struct intel_context *intel,
 #define TAG2(x,y) intel_##x##y##_A8
 #include "spantmp2.h"
 
+#define SPANTMP_MESA_FMT MESA_FORMAT_R8
+#define TAG(x) intel_##x##_R8
+#define TAG2(x,y) intel_##x##y##_R8
+#include "spantmp2.h"
+
+#define SPANTMP_MESA_FMT MESA_FORMAT_RG88
+#define TAG(x) intel_##x##_RG88
+#define TAG2(x,y) intel_##x##y##_RG88
+#include "spantmp2.h"
+
+#define SPANTMP_MESA_FMT MESA_FORMAT_R16
+#define TAG(x) intel_##x##_R16
+#define TAG2(x,y) intel_##x##y##_R16
+#include "spantmp2.h"
+
+#define SPANTMP_MESA_FMT MESA_FORMAT_RG1616
+#define TAG(x) intel_##x##_RG1616
+#define TAG2(x,y) intel_##x##y##_RG1616
+#include "spantmp2.h"
+
 #define LOCAL_DEPTH_VARS						\
    struct intel_renderbuffer *irb = intel_renderbuffer(rb);		\
    const GLint yScale = rb->Name ? 1 : -1;				\
@@ -339,6 +360,32 @@ intel_unmap_vertex_shader_textures(struct gl_context *ctx)
    }
 }
 
+typedef void (*span_init_func)(struct gl_renderbuffer *rb);
+
+static span_init_func intel_span_init_funcs[MESA_FORMAT_COUNT] =
+{
+   [MESA_FORMAT_A8] = intel_InitPointers_A8,
+   [MESA_FORMAT_RGB565] = intel_InitPointers_RGB565,
+   [MESA_FORMAT_ARGB4444] = intel_InitPointers_ARGB4444,
+   [MESA_FORMAT_ARGB1555] = intel_InitPointers_ARGB1555,
+   [MESA_FORMAT_XRGB8888] = intel_InitPointers_xRGB8888,
+   [MESA_FORMAT_ARGB8888] = intel_InitPointers_ARGB8888,
+   [MESA_FORMAT_SARGB8] = intel_InitPointers_ARGB8888,
+   [MESA_FORMAT_Z16] = intel_InitDepthPointers_z16,
+   [MESA_FORMAT_X8_Z24] = intel_InitDepthPointers_z24_s8,
+   [MESA_FORMAT_S8_Z24] = intel_InitDepthPointers_z24_s8,
+   [MESA_FORMAT_R8] = intel_InitPointers_R8,
+   [MESA_FORMAT_RG88] = intel_InitPointers_RG88,
+   [MESA_FORMAT_R16] = intel_InitPointers_R16,
+   [MESA_FORMAT_RG1616] = intel_InitPointers_RG1616,
+};
+
+bool
+intel_span_supports_format(gl_format format)
+{
+   return intel_span_init_funcs[format] != NULL;
+}
+
 /**
  * Plug in appropriate span read/write functions for the given renderbuffer.
  * These are used for the software fallbacks.
@@ -349,37 +396,6 @@ intel_set_span_functions(struct intel_context *intel,
 {
    struct intel_renderbuffer *irb = (struct intel_renderbuffer *) rb;
 
-   switch (irb->Base.Format) {
-   case MESA_FORMAT_A8:
-      intel_InitPointers_A8(rb);
-      break;
-   case MESA_FORMAT_RGB565:
-      intel_InitPointers_RGB565(rb);
-      break;
-   case MESA_FORMAT_ARGB4444:
-      intel_InitPointers_ARGB4444(rb);
-      break;
-   case MESA_FORMAT_ARGB1555:
-      intel_InitPointers_ARGB1555(rb);
-      break;
-   case MESA_FORMAT_XRGB8888:
-      intel_InitPointers_xRGB8888(rb);
-      break;
-   case MESA_FORMAT_ARGB8888:
-   case MESA_FORMAT_SARGB8:
-      intel_InitPointers_ARGB8888(rb);
-      break;
-   case MESA_FORMAT_Z16:
-      intel_InitDepthPointers_z16(rb);
-      break;
-   case MESA_FORMAT_X8_Z24:
-   case MESA_FORMAT_S8_Z24:
-      intel_InitDepthPointers_z24_s8(rb);
-      break;
-   default:
-      _mesa_problem(NULL,
-		    "Unexpected MesaFormat %d in intelSetSpanFunctions",
-		    irb->Base.Format);
-      break;
-   }
+   assert(intel_span_init_funcs[irb->Base.Format]);
+   intel_span_init_funcs[irb->Base.Format](rb);
 }
diff --git a/src/mesa/drivers/dri/intel/intel_span.h b/src/mesa/drivers/dri/intel/intel_span.h
index aa8d08e843..5a4c4e8e52 100644
--- a/src/mesa/drivers/dri/intel/intel_span.h
+++ b/src/mesa/drivers/dri/intel/intel_span.h
@@ -28,6 +28,9 @@
 #ifndef _INTEL_SPAN_H
 #define _INTEL_SPAN_H
 
+#include "main/formats.h"
+#include <stdbool.h>
+
 extern void intelInitSpanFuncs(struct gl_context * ctx);
 
 extern void intelSpanRenderFinish(struct gl_context * ctx);
@@ -38,5 +41,6 @@ void intel_renderbuffer_unmap(struct intel_context *intel,
 			      struct gl_renderbuffer *rb);
 void intel_map_vertex_shader_textures(struct gl_context *ctx);
 void intel_unmap_vertex_shader_textures(struct gl_context *ctx);
+bool intel_span_supports_format(gl_format format);
 
 #endif
diff --git a/src/mesa/drivers/dri/intel/intel_syncobj.c b/src/mesa/drivers/dri/intel/intel_syncobj.c
index bbfac74b60..b303ea84dd 100644
--- a/src/mesa/drivers/dri/intel/intel_syncobj.c
+++ b/src/mesa/drivers/dri/intel/intel_syncobj.c
@@ -72,9 +72,9 @@ intel_fence_sync(struct gl_context *ctx, struct gl_sync_object *s,
    struct intel_sync_object *sync = (struct intel_sync_object *)s;
 
    assert(condition == GL_SYNC_GPU_COMMANDS_COMPLETE);
-   intel_batchbuffer_emit_mi_flush(intel->batch);
+   intel_batchbuffer_emit_mi_flush(intel);
 
-   sync->bo = intel->batch->buf;
+   sync->bo = intel->batch.bo;
    drm_intel_bo_reference(sync->bo);
 
    intel_flush(ctx);
diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c
index 646e55bdff..077c611901 100644
--- a/src/mesa/drivers/dri/intel/intel_tex.c
+++ b/src/mesa/drivers/dri/intel/intel_tex.c
@@ -113,7 +113,6 @@ intelGenerateMipmap(struct gl_context *ctx, GLenum target,
 void
 intelInitTextureFuncs(struct dd_function_table *functions)
 {
-   functions->ChooseTextureFormat = intelChooseTextureFormat;
    functions->GenerateMipmap = intelGenerateMipmap;
 
    functions->NewTextureObject = intelNewTextureObject;
diff --git a/src/mesa/drivers/dri/intel/intel_tex.h b/src/mesa/drivers/dri/intel/intel_tex.h
index 7906554e45..52462f39d5 100644
--- a/src/mesa/drivers/dri/intel/intel_tex.h
+++ b/src/mesa/drivers/dri/intel/intel_tex.h
@@ -40,8 +40,7 @@ void intelInitTextureSubImageFuncs(struct dd_function_table *functions);
 
 void intelInitTextureCopyImageFuncs(struct dd_function_table *functions);
 
-gl_format intelChooseTextureFormat(struct gl_context *ctx, GLint internalFormat,
-                                   GLenum format, GLenum type);
+GLenum intel_mesa_format_to_rb_datatype(gl_format format);
 
 void intelSetTexBuffer(__DRIcontext *pDRICtx,
 		       GLint target, __DRIdrawable *pDraw);
@@ -66,4 +65,12 @@ void intel_tex_unmap_images(struct intel_context *intel,
 
 int intel_compressed_num_bytes(GLuint mesaFormat);
 
+GLboolean intel_copy_texsubimage(struct intel_context *intel,
+                                 GLenum target,
+                                 struct intel_texture_image *intelImage,
+                                 GLenum internalFormat,
+                                 GLint dstx, GLint dsty,
+                                 GLint x, GLint y,
+                                 GLsizei width, GLsizei height);
+
 #endif
diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c
index 87b31bf078..62d4169acd 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c
@@ -35,7 +35,6 @@
 
 #include "intel_screen.h"
 #include "intel_context.h"
-#include "intel_buffers.h"
 #include "intel_mipmap_tree.h"
 #include "intel_regions.h"
 #include "intel_fbo.h"
@@ -50,74 +49,70 @@
  * Do the best we can using the blitter.  A future project is to use
  * the texture engine and fragment programs for these copies.
  */
-static const struct intel_region *
-get_teximage_source(struct intel_context *intel, GLenum internalFormat)
+static struct intel_renderbuffer *
+get_teximage_readbuffer(struct intel_context *intel, GLenum internalFormat)
 {
-   struct intel_renderbuffer *irb;
-
    DBG("%s %s\n", __FUNCTION__,
        _mesa_lookup_enum_by_nr(internalFormat));
 
    switch (internalFormat) {
    case GL_DEPTH_COMPONENT:
    case GL_DEPTH_COMPONENT16:
-      irb = intel_get_renderbuffer(intel->ctx.ReadBuffer, BUFFER_DEPTH);
-      if (irb && irb->region && irb->region->cpp == 2)
-         return irb->region;
-      return NULL;
    case GL_DEPTH24_STENCIL8_EXT:
    case GL_DEPTH_STENCIL_EXT:
-      irb = intel_get_renderbuffer(intel->ctx.ReadBuffer, BUFFER_DEPTH);
-      if (irb && irb->region && irb->region->cpp == 4)
-         return irb->region;
-      return NULL;
-   case 4:
-   case GL_RGBA:
-   case GL_RGBA8:
-      irb = intel_renderbuffer(intel->ctx.ReadBuffer->_ColorReadBuffer);
-      /* We're required to set alpha to 1.0 in this case, but we can't
-       * do that with the blitter, so fall back.  We could use the 3D
-       * engine or do two passes with the blitter, but it doesn't seem
-       * worth it for this case. */
-      if (irb->Base._BaseFormat == GL_RGB)
-	 return NULL;
-      return irb->region;
-   case 3:
-   case GL_RGB:
-   case GL_RGB8:
-      return intel_readbuf_region(intel);
+      return intel_get_renderbuffer(intel->ctx.ReadBuffer, BUFFER_DEPTH);
    default:
-      return NULL;
+      return intel_renderbuffer(intel->ctx.ReadBuffer->_ColorReadBuffer);
    }
 }
 
 
-static GLboolean
-do_copy_texsubimage(struct intel_context *intel,
-		    GLenum target,
-                    struct intel_texture_image *intelImage,
-                    GLenum internalFormat,
-                    GLint dstx, GLint dsty,
-                    GLint x, GLint y, GLsizei width, GLsizei height)
+GLboolean
+intel_copy_texsubimage(struct intel_context *intel,
+                       GLenum target,
+                       struct intel_texture_image *intelImage,
+                       GLenum internalFormat,
+                       GLint dstx, GLint dsty,
+                       GLint x, GLint y, GLsizei width, GLsizei height)
 {
    struct gl_context *ctx = &intel->ctx;
-   const struct intel_region *src = get_teximage_source(intel, internalFormat);
+   struct intel_renderbuffer *irb;
+   bool copy_supported = false;
+   bool copy_supported_with_alpha_override = false;
+
+   intel_prepare_render(intel);
 
-   if (!intelImage->mt || !src || !src->buffer) {
+   irb = get_teximage_readbuffer(intel, internalFormat);
+   if (!intelImage->mt || !irb || !irb->region) {
       if (unlikely(INTEL_DEBUG & DEBUG_FALLBACKS))
 	 fprintf(stderr, "%s fail %p %p (0x%08x)\n",
-		 __FUNCTION__, intelImage->mt, src, internalFormat);
+		 __FUNCTION__, intelImage->mt, irb, internalFormat);
       return GL_FALSE;
    }
 
-   if (intelImage->mt->cpp != src->cpp) {
-      fallback_debug("%s fail %d vs %d cpp\n",
-		     __FUNCTION__, intelImage->mt->cpp, src->cpp);
+   copy_supported = intelImage->base.TexFormat == irb->Base.Format;
+
+   /* Converting ARGB8888 to XRGB8888 is trivial: ignore the alpha bits */
+   if (irb->Base.Format == MESA_FORMAT_ARGB8888 &&
+       intelImage->base.TexFormat == MESA_FORMAT_XRGB8888) {
+      copy_supported = true;
+   }
+
+   /* Converting XRGB8888 to ARGB8888 requires setting the alpha bits to 1.0 */
+   if (irb->Base.Format == MESA_FORMAT_XRGB8888 &&
+       intelImage->base.TexFormat == MESA_FORMAT_ARGB8888) {
+      copy_supported_with_alpha_override = true;
+   }
+
+   if (!copy_supported && !copy_supported_with_alpha_override) {
+      if (unlikely(INTEL_DEBUG & DEBUG_FALLBACKS))
+	 fprintf(stderr, "%s mismatched formats %s, %s\n",
+		 __FUNCTION__,
+		 _mesa_get_format_name(intelImage->base.TexFormat),
+		 _mesa_get_format_name(irb->Base.Format));
       return GL_FALSE;
    }
 
-   /* intel_flush(ctx); */
-   intel_prepare_render(intel);
    {
       drm_intel_bo *dst_bo = intel_region_buffer(intel,
 						 intelImage->mt->region,
@@ -140,24 +135,24 @@ do_copy_texsubimage(struct intel_context *intel,
       if (ctx->ReadBuffer->Name == 0) {
 	 /* Flip vertical orientation for system framebuffers */
 	 y = ctx->ReadBuffer->Height - (y + height);
-	 src_pitch = -src->pitch;
+	 src_pitch = -irb->region->pitch;
       } else {
 	 /* reading from a FBO, y is already oriented the way we like */
-	 src_pitch = src->pitch;
+	 src_pitch = irb->region->pitch;
       }
 
       /* blit from src buffer to texture */
       if (!intelEmitCopyBlit(intel,
 			     intelImage->mt->cpp,
 			     src_pitch,
-			     src->buffer,
+			     irb->region->buffer,
 			     0,
-			     src->tiling,
+			     irb->region->tiling,
 			     intelImage->mt->region->pitch,
 			     dst_bo,
 			     0,
 			     intelImage->mt->region->tiling,
-			     src->draw_x + x, src->draw_y + y,
+			     irb->region->draw_x + x, irb->region->draw_y + y,
 			     image_x + dstx, image_y + dsty,
 			     width, height,
 			     GL_COPY)) {
@@ -165,6 +160,9 @@ do_copy_texsubimage(struct intel_context *intel,
       }
    }
 
+   if (copy_supported_with_alpha_override)
+      intel_set_teximage_alpha_to_one(ctx, intelImage);
+
    return GL_TRUE;
 }
 
@@ -202,9 +200,9 @@ intelCopyTexImage1D(struct gl_context * ctx, GLenum target, GLint level,
 				   &width, &height))
       return;
 
-   if (!do_copy_texsubimage(intel_context(ctx), target,
-                            intel_texture_image(texImage),
-                            internalFormat, 0, 0, x, y, width, height))
+   if (!intel_copy_texsubimage(intel_context(ctx), target,
+                               intel_texture_image(texImage),
+                               internalFormat, 0, 0, x, y, width, height))
       goto fail;
 
    return;
@@ -250,9 +248,9 @@ intelCopyTexImage2D(struct gl_context * ctx, GLenum target, GLint level,
 				   &width, &height))
       return;
 
-   if (!do_copy_texsubimage(intel_context(ctx), target,
-                            intel_texture_image(texImage),
-                            internalFormat, 0, 0, x, y, width, height))
+   if (!intel_copy_texsubimage(intel_context(ctx), target,
+                               intel_texture_image(texImage),
+                               internalFormat, 0, 0, x, y, width, height))
       goto fail;
 
    return;
@@ -280,9 +278,9 @@ intelCopyTexSubImage1D(struct gl_context * ctx, GLenum target, GLint level,
    /* Need to check texture is compatible with source format. 
     */
 
-   if (!do_copy_texsubimage(intel_context(ctx), target,
-                            intel_texture_image(texImage),
-                            internalFormat, xoffset, 0, x, y, width, 1)) {
+   if (!intel_copy_texsubimage(intel_context(ctx), target,
+                               intel_texture_image(texImage),
+                               internalFormat, xoffset, 0, x, y, width, 1)) {
       fallback_debug("%s - fallback to swrast\n", __FUNCTION__);
       _mesa_meta_CopyTexSubImage1D(ctx, target, level, xoffset, x, y, width);
    }
@@ -304,11 +302,10 @@ intelCopyTexSubImage2D(struct gl_context * ctx, GLenum target, GLint level,
    /* Need to check texture is compatible with source format. 
     */
 
-   if (!do_copy_texsubimage(intel_context(ctx), target,
-                            intel_texture_image(texImage),
-                            internalFormat,
-                            xoffset, yoffset, x, y, width, height)) {
-
+   if (!intel_copy_texsubimage(intel_context(ctx), target,
+                               intel_texture_image(texImage),
+                               internalFormat,
+                               xoffset, yoffset, x, y, width, height)) {
       fallback_debug("%s - fallback to swrast\n", __FUNCTION__);
       _mesa_meta_CopyTexSubImage2D(ctx, target, level,
                                    xoffset, yoffset, x, y, width, height);
diff --git a/src/mesa/drivers/dri/intel/intel_tex_format.c b/src/mesa/drivers/dri/intel/intel_tex_format.c
index 9d73a2fb37..87745bc66d 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_format.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_format.c
@@ -4,224 +4,35 @@
 #include "main/formats.h"
 
 /**
- * Choose hardware texture format given the user's glTexImage parameters.
- *
- * It works out that this function is fine for all the supported
- * hardware.  However, there is still a need to map the formats onto
- * hardware descriptors.
- *
- * Note that the i915 can actually support many more formats than
- * these if we take the step of simply swizzling the colors
- * immediately after sampling...
+ * Returns the renderbuffer DataType for a MESA_FORMAT.
  */
-gl_format
-intelChooseTextureFormat(struct gl_context * ctx, GLint internalFormat,
-                         GLenum format, GLenum type)
+GLenum
+intel_mesa_format_to_rb_datatype(gl_format format)
 {
-   struct intel_context *intel = intel_context(ctx);
-
-#if 0
-   printf("%s intFmt=0x%x format=0x%x type=0x%x\n",
-          __FUNCTION__, internalFormat, format, type);
-#endif
-
-   switch (internalFormat) {
-   case 4:
-   case GL_RGBA:
-   case GL_COMPRESSED_RGBA:
-      if (type == GL_UNSIGNED_SHORT_4_4_4_4_REV)
-	 return MESA_FORMAT_ARGB4444;
-      else if (type == GL_UNSIGNED_SHORT_1_5_5_5_REV)
-	 return MESA_FORMAT_ARGB1555;
-      else
-	 return MESA_FORMAT_ARGB8888;
-
-   case 3:
-   case GL_RGB:
-   case GL_COMPRESSED_RGB:
-      if (type == GL_UNSIGNED_SHORT_5_6_5)
-	 return MESA_FORMAT_RGB565;
-      else if (intel->has_xrgb_textures)
-	 return MESA_FORMAT_XRGB8888;
-      else
-	 return MESA_FORMAT_ARGB8888;
-
-   case GL_RGBA8:
-   case GL_RGB10_A2:
-   case GL_RGBA12:
-   case GL_RGBA16:
-      return MESA_FORMAT_ARGB8888;
-
-   case GL_RGBA4:
-   case GL_RGBA2:
-      return MESA_FORMAT_ARGB4444;
-
-   case GL_RGB5_A1:
-      return MESA_FORMAT_ARGB1555;
-
-   case GL_RGB8:
-   case GL_RGB10:
-   case GL_RGB12:
-   case GL_RGB16:
-      if (intel->has_xrgb_textures)
-	 return MESA_FORMAT_XRGB8888;
-      else
-	 return MESA_FORMAT_ARGB8888;
-
-   case GL_RGB5:
-   case GL_RGB4:
-   case GL_R3_G3_B2:
-      return MESA_FORMAT_RGB565;
-
-   case GL_ALPHA:
-   case GL_ALPHA4:
-   case GL_ALPHA8:
-   case GL_ALPHA12:
-   case GL_ALPHA16:
-   case GL_COMPRESSED_ALPHA:
-      return MESA_FORMAT_A8;
-
-   case 1:
-   case GL_LUMINANCE:
-   case GL_LUMINANCE4:
-   case GL_LUMINANCE8:
-   case GL_LUMINANCE12:
-   case GL_LUMINANCE16:
-   case GL_COMPRESSED_LUMINANCE:
-      return MESA_FORMAT_L8;
-
-   case GL_LUMINANCE12_ALPHA4:
-   case GL_LUMINANCE12_ALPHA12:
-   case GL_LUMINANCE16_ALPHA16:
-      /* i915 could implement this mode using MT_32BIT_RG1616.  However, this
-       * would require an extra swizzle instruction in the fragment shader to
-       * convert the { R, G, 1.0, 1.0 } to { R, R, R, G }.
-       */
-#ifndef I915
-      return MESA_FORMAT_AL1616;
-#else
-      /* FALLTHROUGH */
-#endif
-
-   case 2:
-   case GL_LUMINANCE_ALPHA:
-   case GL_LUMINANCE4_ALPHA4:
-   case GL_LUMINANCE6_ALPHA2:
-   case GL_LUMINANCE8_ALPHA8:
-   case GL_COMPRESSED_LUMINANCE_ALPHA:
-      return MESA_FORMAT_AL88;
-
-   case GL_INTENSITY:
-   case GL_INTENSITY4:
-   case GL_INTENSITY8:
-   case GL_INTENSITY12:
-   case GL_INTENSITY16:
-   case GL_COMPRESSED_INTENSITY:
-      return MESA_FORMAT_I8;
-
-   case GL_YCBCR_MESA:
-      if (type == GL_UNSIGNED_SHORT_8_8_MESA || type == GL_UNSIGNED_BYTE)
-         return MESA_FORMAT_YCBCR;
-      else
-         return MESA_FORMAT_YCBCR_REV;
-
-   case GL_COMPRESSED_RGB_FXT1_3DFX:
-      return MESA_FORMAT_RGB_FXT1;
-   case GL_COMPRESSED_RGBA_FXT1_3DFX:
-      return MESA_FORMAT_RGBA_FXT1;
-
-   case GL_RGB_S3TC:
-   case GL_RGB4_S3TC:
-   case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
-      return MESA_FORMAT_RGB_DXT1;
-
-   case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
-      return MESA_FORMAT_RGBA_DXT1;
-
-   case GL_RGBA_S3TC:
-   case GL_RGBA4_S3TC:
-   case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
-      return MESA_FORMAT_RGBA_DXT3;
-
-   case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
-      return MESA_FORMAT_RGBA_DXT5;
-
-   case GL_DEPTH_COMPONENT:
-   case GL_DEPTH_COMPONENT16:
-   case GL_DEPTH_COMPONENT24:
-   case GL_DEPTH_COMPONENT32:
-#if 0
-      return MESA_FORMAT_Z16;
-#else
-      /* fall-through.
-       * 16bpp depth texture can't be paired with a stencil buffer so
-       * always used combined depth/stencil format.
-       */
-#endif
-   case GL_DEPTH_STENCIL_EXT:
-   case GL_DEPTH24_STENCIL8_EXT:
-      return MESA_FORMAT_S8_Z24;
-
-#ifndef I915
-   case GL_SRGB_EXT:
-   case GL_SRGB8_EXT:
-   case GL_SRGB_ALPHA_EXT:
-   case GL_SRGB8_ALPHA8_EXT:
-   case GL_COMPRESSED_SRGB_EXT:
-   case GL_COMPRESSED_SRGB_ALPHA_EXT:
-   case GL_COMPRESSED_SLUMINANCE_EXT:
-   case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT:
-      return MESA_FORMAT_SARGB8;
-   case GL_SLUMINANCE_EXT:
-   case GL_SLUMINANCE8_EXT:
-      if (intel->has_luminance_srgb)
-         return MESA_FORMAT_SL8;
-      else
-         return MESA_FORMAT_SARGB8;
-   case GL_SLUMINANCE_ALPHA_EXT:
-   case GL_SLUMINANCE8_ALPHA8_EXT:
-      if (intel->has_luminance_srgb)
-         return MESA_FORMAT_SLA8;
-      else
-         return MESA_FORMAT_SARGB8;
-   case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
-   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
-   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
-   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
-      return MESA_FORMAT_SRGB_DXT1;
-
-   /* i915 could also do this */
-   case GL_DUDV_ATI:
-   case GL_DU8DV8_ATI:
-      return MESA_FORMAT_DUDV8;
-   case GL_RGBA_SNORM:
-   case GL_RGBA8_SNORM:
-      return MESA_FORMAT_SIGNED_RGBA8888_REV;
-
-   /* i915 can do a RG16, but it can't do any of the other RED or RG formats.
-    * In addition, it only implements the broken D3D mode where undefined
-    * components are read as 1.0.  I'm not sure who thought reading
-    * { R, G, 1.0, 1.0 } from a red-green texture would be useful.
-    */
-   case GL_RED:
-   case GL_R8:
-      return MESA_FORMAT_R8;
-   case GL_R16:
-      return MESA_FORMAT_R16;
-   case GL_RG:
-   case GL_RG8:
-      return MESA_FORMAT_RG88;
-   case GL_RG16:
-      return MESA_FORMAT_RG1616;
-#endif
-
+   switch (format) {
+   case MESA_FORMAT_ARGB8888:
+   case MESA_FORMAT_XRGB8888:
+   case MESA_FORMAT_SARGB8:
+   case MESA_FORMAT_R8:
+   case MESA_FORMAT_RG88:
+   case MESA_FORMAT_A8:
+   case MESA_FORMAT_AL88:
+   case MESA_FORMAT_RGB565:
+   case MESA_FORMAT_ARGB1555:
+   case MESA_FORMAT_ARGB4444:
+      return GL_UNSIGNED_BYTE;
+   case MESA_FORMAT_R16:
+   case MESA_FORMAT_RG1616:
+   case MESA_FORMAT_Z16:
+      return GL_UNSIGNED_SHORT;
+   case MESA_FORMAT_X8_Z24:
+      return GL_UNSIGNED_INT;
+   case MESA_FORMAT_S8_Z24:
+      return GL_UNSIGNED_INT_24_8_EXT;
    default:
-      fprintf(stderr, "unexpected texture format %s in %s\n",
-              _mesa_lookup_enum_by_nr(internalFormat), __FUNCTION__);
-      return MESA_FORMAT_NONE;
+      _mesa_problem(NULL, "unexpected MESA_FORMAT for renderbuffer");
+      return GL_UNSIGNED_BYTE;
    }
-
-   return MESA_FORMAT_NONE;       /* never get here */
 }
 
 int intel_compressed_num_bytes(GLuint mesaFormat)
diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c
index 41cdbfd2cb..906f8a6271 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_image.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_image.c
@@ -1,11 +1,13 @@
 
 #include "main/glheader.h"
 #include "main/macros.h"
+#include "main/mfeatures.h"
 #include "main/mtypes.h"
 #include "main/enums.h"
 #include "main/bufferobj.h"
 #include "main/context.h"
 #include "main/formats.h"
+#include "main/pbo.h"
 #include "main/texcompress.h"
 #include "main/texstore.h"
 #include "main/texgetimage.h"
@@ -55,11 +57,11 @@ logbase2(int n)
  * 0)..(1x1).  Consider pruning this tree at a validation if the
  * saving is worth it.
  */
-static void
-guess_and_alloc_mipmap_tree(struct intel_context *intel,
-                            struct intel_texture_object *intelObj,
-                            struct intel_texture_image *intelImage,
-			    GLboolean expect_accelerated_upload)
+static struct intel_mipmap_tree *
+intel_miptree_create_for_teximage(struct intel_context *intel,
+				  struct intel_texture_object *intelObj,
+				  struct intel_texture_image *intelImage,
+				  GLboolean expect_accelerated_upload)
 {
    GLuint firstLevel;
    GLuint lastLevel;
@@ -72,70 +74,71 @@ guess_and_alloc_mipmap_tree(struct intel_context *intel,
    DBG("%s\n", __FUNCTION__);
 
    if (intelImage->base.Border)
-      return;
+      return NULL;
 
    if (intelImage->level > intelObj->base.BaseLevel &&
        (intelImage->base.Width == 1 ||
         (intelObj->base.Target != GL_TEXTURE_1D &&
          intelImage->base.Height == 1) ||
         (intelObj->base.Target == GL_TEXTURE_3D &&
-         intelImage->base.Depth == 1)))
-      return;
-
-   /* If this image disrespects BaseLevel, allocate from level zero.
-    * Usually BaseLevel == 0, so it's unlikely to happen.
-    */
-   if (intelImage->level < intelObj->base.BaseLevel)
-      firstLevel = 0;
-   else
-      firstLevel = intelObj->base.BaseLevel;
-
-
-   /* Figure out image dimensions at start level. 
-    */
-   for (i = intelImage->level; i > firstLevel; i--) {
-      width <<= 1;
-      if (height != 1)
-         height <<= 1;
-      if (depth != 1)
-         depth <<= 1;
-   }
+         intelImage->base.Depth == 1))) {
+      /* For this combination, we're at some lower mipmap level and
+       * some important dimension is 1.  We can't extrapolate up to a
+       * likely base level width/height/depth for a full mipmap stack
+       * from this info, so just allocate this one level.
+       */
+      firstLevel = intelImage->level;
+      lastLevel = intelImage->level;
+   } else {
+      /* If this image disrespects BaseLevel, allocate from level zero.
+       * Usually BaseLevel == 0, so it's unlikely to happen.
+       */
+      if (intelImage->level < intelObj->base.BaseLevel)
+	 firstLevel = 0;
+      else
+	 firstLevel = intelObj->base.BaseLevel;
+
+      /* Figure out image dimensions at start level. */
+      for (i = intelImage->level; i > firstLevel; i--) {
+	 width <<= 1;
+	 if (height != 1)
+	    height <<= 1;
+	 if (depth != 1)
+	    depth <<= 1;
+      }
 
-   /* Guess a reasonable value for lastLevel.  This is probably going
-    * to be wrong fairly often and might mean that we have to look at
-    * resizable buffers, or require that buffers implement lazy
-    * pagetable arrangements.
-    */
-   if ((intelObj->base.MinFilter == GL_NEAREST ||
-        intelObj->base.MinFilter == GL_LINEAR) &&
-       intelImage->level == firstLevel &&
-       (intel->gen < 4 || firstLevel == 0)) {
-      lastLevel = firstLevel;
-   }
-   else {
-      lastLevel = firstLevel + logbase2(MAX2(MAX2(width, height), depth));
+      /* Guess a reasonable value for lastLevel.  This is probably going
+       * to be wrong fairly often and might mean that we have to look at
+       * resizable buffers, or require that buffers implement lazy
+       * pagetable arrangements.
+       */
+      if ((intelObj->base.MinFilter == GL_NEAREST ||
+	   intelObj->base.MinFilter == GL_LINEAR) &&
+	  intelImage->level == firstLevel &&
+	  (intel->gen < 4 || firstLevel == 0)) {
+	 lastLevel = firstLevel;
+      } else {
+	 lastLevel = firstLevel + logbase2(MAX2(MAX2(width, height), depth));
+      }
    }
 
-   assert(!intelObj->mt);
    if (_mesa_is_format_compressed(intelImage->base.TexFormat))
       comp_byte = intel_compressed_num_bytes(intelImage->base.TexFormat);
 
    texelBytes = _mesa_get_format_bytes(intelImage->base.TexFormat);
 
-   intelObj->mt = intel_miptree_create(intel,
-                                       intelObj->base.Target,
-                                       intelImage->base._BaseFormat,
-                                       intelImage->base.InternalFormat,
-                                       firstLevel,
-                                       lastLevel,
-                                       width,
-                                       height,
-                                       depth,
-                                       texelBytes,
-                                       comp_byte,
-				       expect_accelerated_upload);
-
-   DBG("%s - success\n", __FUNCTION__);
+   return intel_miptree_create(intel,
+			       intelObj->base.Target,
+			       intelImage->base._BaseFormat,
+			       intelImage->base.InternalFormat,
+			       firstLevel,
+			       lastLevel,
+			       width,
+			       height,
+			       depth,
+			       texelBytes,
+			       comp_byte,
+			       expect_accelerated_upload);
 }
 
 
@@ -229,15 +232,18 @@ try_pbo_upload(struct intel_context *intel,
 
    dst_stride = intelImage->mt->region->pitch;
 
-   if (drm_intel_bo_references(intel->batch->buf, dst_buffer))
+   if (drm_intel_bo_references(intel->batch.bo, dst_buffer))
       intel_flush(&intel->ctx);
 
    {
-      drm_intel_bo *src_buffer = intel_bufferobj_buffer(intel, pbo, INTEL_READ);
+      GLuint offset;
+      drm_intel_bo *src_buffer =
+	      intel_bufferobj_source(intel, pbo, 64, &offset);
 
       if (!intelEmitCopyBlit(intel,
 			     intelImage->mt->cpp,
-			     src_stride, src_buffer, src_offset, GL_FALSE,
+			     src_stride, src_buffer,
+			     src_offset + offset, GL_FALSE,
 			     dst_stride, dst_buffer, 0,
 			     intelImage->mt->region->tiling,
 			     0, 0, dst_x, dst_y, width, height,
@@ -343,41 +349,29 @@ intelTexImage(struct gl_context * ctx,
       texImage->Data = NULL;
    }
 
-   if (!intelObj->mt) {
-      guess_and_alloc_mipmap_tree(intel, intelObj, intelImage, pixels == NULL);
-      if (!intelObj->mt) {
-	 DBG("guess_and_alloc_mipmap_tree: failed\n");
-      }
-   }
-
    assert(!intelImage->mt);
 
    if (intelObj->mt &&
        intel_miptree_match_image(intelObj->mt, &intelImage->base)) {
-
+      /* Use an existing miptree when possible */
       intel_miptree_reference(&intelImage->mt, intelObj->mt);
       assert(intelImage->mt);
    } else if (intelImage->base.Border == 0) {
-      int comp_byte = 0;
-      GLuint texelBytes = _mesa_get_format_bytes(intelImage->base.TexFormat);
-      GLenum baseFormat = _mesa_get_format_base_format(intelImage->base.TexFormat);
-      if (_mesa_is_format_compressed(intelImage->base.TexFormat)) {
-	 comp_byte =
-	    intel_compressed_num_bytes(intelImage->base.TexFormat);
-      }
-
       /* Didn't fit in the object miptree, but it's suitable for inclusion in
        * a miptree, so create one just for our level and store it in the image.
        * It'll get moved into the object miptree at validate time.
        */
-      intelImage->mt = intel_miptree_create(intel, target,
-					    baseFormat,
-					    internalFormat,
-					    level, level,
-					    width, height, depth,
-					    texelBytes,
-					    comp_byte, pixels == NULL);
-
+      intelImage->mt = intel_miptree_create_for_teximage(intel, intelObj,
+							 intelImage,
+							 pixels == NULL);
+
+      /* Even if the object currently has a mipmap tree associated
+       * with it, this one is a more likely candidate to represent the
+       * whole object since our level didn't fit what was there
+       * before, and any lower levels would fit into our miptree.
+       */
+      if (intelImage->mt)
+	 intel_miptree_reference(&intelObj->mt, intelImage->mt);
    }
 
    /* PBO fastpaths:
@@ -439,7 +433,7 @@ intelTexImage(struct gl_context * ctx,
    if (intelImage->mt) {
       if (pixels != NULL) {
 	 /* Flush any queued rendering with the texture before mapping. */
-	 if (drm_intel_bo_references(intel->batch->buf,
+	 if (drm_intel_bo_references(intel->batch.bo,
 				     intelImage->mt->region->buffer)) {
 	    intel_flush(ctx);
 	 }
@@ -711,8 +705,7 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
    }
 
    mt = intel_miptree_create_for_region(intel, target,
-					internalFormat,
-					0, 0, rb->region, 1, 0);
+					internalFormat, rb->region, 1, 0);
    if (mt == NULL)
        return;
 
@@ -777,7 +770,7 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target,
 
    mt = intel_miptree_create_for_region(intel, target,
 					image->internal_format,
-					0, 0, image->region, 1, 0);
+					image->region, 1, 0);
    if (mt == NULL)
        return;
 
diff --git a/src/mesa/drivers/dri/intel/intel_tex_obj.h b/src/mesa/drivers/dri/intel/intel_tex_obj.h
index 5f60e0ea4f..e93ef4a472 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_obj.h
+++ b/src/mesa/drivers/dri/intel/intel_tex_obj.h
@@ -32,11 +32,11 @@ struct intel_texture_object
 {
    struct gl_texture_object base;       /* The "parent" object */
 
-   /* The mipmap tree must include at least these levels once
-    * validated:
+   /* This is a mirror of base._MaxLevel, updated at validate time,
+    * except that we don't bother with the non-base levels for
+    * non-mipmapped textures.
     */
-   GLuint firstLevel;
-   GLuint lastLevel;
+   unsigned int _MaxLevel;
 
    /* Offset for firstLevel image:
     */
diff --git a/src/mesa/drivers/dri/intel/intel_tex_subimage.c b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
index c9b992a21b..d0f8294113 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
@@ -27,6 +27,7 @@
  **************************************************************************/
 
 #include "main/mtypes.h"
+#include "main/pbo.h"
 #include "main/texobj.h"
 #include "main/texstore.h"
 #include "main/texcompress.h"
@@ -89,19 +90,19 @@ intelTexSubimage(struct gl_context * ctx,
 	  intel->gen < 6 && target == GL_TEXTURE_2D &&
 	  drm_intel_bo_busy(dst_bo))
       {
-	 unsigned long pitch;
-	 uint32_t tiling_mode = I915_TILING_NONE;
-	 temp_bo = drm_intel_bo_alloc_tiled(intel->bufmgr,
-					    "subimage blit bo",
-					    width, height,
-					    intelImage->mt->cpp,
-					    &tiling_mode,
-					    &pitch,
-					    0);
-	 drm_intel_gem_bo_map_gtt(temp_bo);
+	 dstRowStride = width * intelImage->mt->cpp;
+         temp_bo = drm_intel_bo_alloc(intel->bufmgr, "subimage blit bo",
+                                      dstRowStride * height, 0);
+         if (!temp_bo)
+            return;
+
+	 if (drm_intel_gem_bo_map_gtt(temp_bo)) {
+            drm_intel_bo_unreference(temp_bo);
+            return;
+         }
+
 	 texImage->Data = temp_bo->virtual;
 	 texImage->ImageOffsets[0] = 0;
-	 dstRowStride = pitch;
 
 	 intel_miptree_get_image_offset(intelImage->mt, level,
 					intelImage->face, 0,
diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c
index ed5c5d896b..a11b07ed09 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c
@@ -8,72 +8,21 @@
 #define FILE_DEBUG_FLAG DEBUG_TEXTURE
 
 /**
- * Compute which mipmap levels that really need to be sent to the hardware.
- * This depends on the base image size, GL_TEXTURE_MIN_LOD,
- * GL_TEXTURE_MAX_LOD, GL_TEXTURE_BASE_LEVEL, and GL_TEXTURE_MAX_LEVEL.
+ * When validating, we only care about the texture images that could
+ * be seen, so for non-mipmapped modes we want to ignore everything
+ * but BaseLevel.
  */
 static void
-intel_calculate_first_last_level(struct intel_context *intel,
-				 struct intel_texture_object *intelObj)
+intel_update_max_level(struct intel_context *intel,
+		       struct intel_texture_object *intelObj)
 {
    struct gl_texture_object *tObj = &intelObj->base;
-   const struct gl_texture_image *const baseImage =
-      tObj->Image[0][tObj->BaseLevel];
 
-   /* These must be signed values.  MinLod and MaxLod can be negative numbers,
-    * and having firstLevel and lastLevel as signed prevents the need for
-    * extra sign checks.
-    */
-   int firstLevel;
-   int lastLevel;
-
-   /* Yes, this looks overly complicated, but it's all needed.
-    */
-   switch (tObj->Target) {
-   case GL_TEXTURE_1D:
-   case GL_TEXTURE_2D:
-   case GL_TEXTURE_3D:
-   case GL_TEXTURE_CUBE_MAP:
-      if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) {
-         /* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL.
-          */
-         firstLevel = lastLevel = tObj->BaseLevel;
-      }
-      else {
-	 if (intel->gen == 2) {
-	    firstLevel = tObj->BaseLevel + (GLint) (tObj->MinLod + 0.5);
-	    firstLevel = MAX2(firstLevel, tObj->BaseLevel);
-	    firstLevel = MIN2(firstLevel, tObj->BaseLevel + baseImage->MaxLog2);
-	    lastLevel = tObj->BaseLevel + (GLint) (tObj->MaxLod + 0.5);
-	    lastLevel = MAX2(lastLevel, tObj->BaseLevel);
-	    lastLevel = MIN2(lastLevel, tObj->BaseLevel + baseImage->MaxLog2);
-	    lastLevel = MIN2(lastLevel, tObj->MaxLevel);
-	    lastLevel = MAX2(firstLevel, lastLevel);       /* need at least one level */
-	 } else {
-	    /* Min/max LOD are taken into account in sampler state.  We don't
-	     * want to re-layout textures just because clamping has been applied
-	     * since it means a bunch of blitting around and probably no memory
-	     * savings (since we have to keep the other levels around anyway).
-	     */
-	    firstLevel = tObj->BaseLevel;
-	    lastLevel = MIN2(tObj->BaseLevel + baseImage->MaxLog2,
-			     tObj->MaxLevel);
-	    /* need at least one level */
-	    lastLevel = MAX2(firstLevel, lastLevel);
-	 }
-      }
-      break;
-   case GL_TEXTURE_RECTANGLE_NV:
-   case GL_TEXTURE_4D_SGIS:
-      firstLevel = lastLevel = 0;
-      break;
-   default:
-      return;
+   if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) {
+      intelObj->_MaxLevel = tObj->BaseLevel;
+   } else {
+      intelObj->_MaxLevel = tObj->_MaxLevel;
    }
-
-   /* save these values */
-   intelObj->firstLevel = firstLevel;
-   intelObj->lastLevel = lastLevel;
 }
 
 /**
@@ -135,8 +84,8 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
 
    /* What levels must the tree include at a minimum?
     */
-   intel_calculate_first_last_level(intel, intelObj);
-   firstImage = intel_texture_image(tObj->Image[0][intelObj->firstLevel]);
+   intel_update_max_level(intel, intelObj);
+   firstImage = intel_texture_image(tObj->Image[0][tObj->BaseLevel]);
 
    /* Fallback case:
     */
@@ -147,23 +96,6 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
       return GL_FALSE;
    }
 
-
-   /* If both firstImage and intelObj have a tree which can contain
-    * all active images, favour firstImage.  Note that because of the
-    * completeness requirement, we know that the image dimensions
-    * will match.
-    */
-   if (firstImage->mt &&
-       firstImage->mt != intelObj->mt &&
-       firstImage->mt->first_level <= intelObj->firstLevel &&
-       firstImage->mt->last_level >= intelObj->lastLevel) {
-
-      if (intelObj->mt)
-         intel_miptree_release(intel, &intelObj->mt);
-
-      intel_miptree_reference(&intelObj->mt, firstImage->mt);
-   }
-
    if (_mesa_is_format_compressed(firstImage->base.TexFormat)) {
       comp_byte = intel_compressed_num_bytes(firstImage->base.TexFormat);
       cpp = comp_byte;
@@ -173,18 +105,17 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
 
    /* Check tree can hold all active levels.  Check tree matches
     * target, imageFormat, etc.
-    * 
-    * XXX: For some layouts (eg i945?), the test might have to be
-    * first_level == firstLevel, as the tree isn't valid except at the
-    * original start level.  Hope to get around this by
-    * programming minLod, maxLod, baseLevel into the hardware and
-    * leaving the tree alone.
+    *
+    * For pre-gen4, we have to match first_level == tObj->BaseLevel,
+    * because we don't have the control that gen4 does to make min/mag
+    * determination happen at a nonzero (hardware) baselevel.  Because
+    * of that, we just always relayout on baselevel change.
     */
    if (intelObj->mt &&
        (intelObj->mt->target != intelObj->base.Target ||
 	intelObj->mt->internal_format != firstImage->base.InternalFormat ||
-	intelObj->mt->first_level != intelObj->firstLevel ||
-	intelObj->mt->last_level != intelObj->lastLevel ||
+	intelObj->mt->first_level != tObj->BaseLevel ||
+	intelObj->mt->last_level < intelObj->_MaxLevel ||
 	intelObj->mt->width0 != firstImage->base.Width ||
 	intelObj->mt->height0 != firstImage->base.Height ||
 	intelObj->mt->depth0 != firstImage->base.Depth ||
@@ -201,25 +132,29 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
                                           intelObj->base.Target,
                                           firstImage->base._BaseFormat,
                                           firstImage->base.InternalFormat,
-                                          intelObj->firstLevel,
-                                          intelObj->lastLevel,
+                                          tObj->BaseLevel,
+                                          intelObj->_MaxLevel,
                                           firstImage->base.Width,
                                           firstImage->base.Height,
                                           firstImage->base.Depth,
                                           cpp,
                                           comp_byte,
 					  GL_TRUE);
+      if (!intelObj->mt)
+         return GL_FALSE;
    }
 
    /* Pull in any images not in the object's tree:
     */
    nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
    for (face = 0; face < nr_faces; face++) {
-      for (i = intelObj->firstLevel; i <= intelObj->lastLevel; i++) {
+      for (i = tObj->BaseLevel; i <= intelObj->_MaxLevel; i++) {
          struct intel_texture_image *intelImage =
             intel_texture_image(intelObj->base.Image[face][i]);
-
-         /* Need to import images in main memory or held in other trees.
+	 /* skip too small size mipmap */
+ 	 if (intelImage == NULL)
+		 break;
+	 /* Need to import images in main memory or held in other trees.
 	  * If it's a render target, then its data isn't needed to be in
 	  * the object tree (otherwise we'd be FBO incomplete), and we need
 	  * to keep track of the image's MT as needing to be pulled in still,
@@ -289,7 +224,7 @@ intel_tex_map_images(struct intel_context *intel,
 
    DBG("%s\n", __FUNCTION__);
 
-   for (i = intelObj->firstLevel; i <= intelObj->lastLevel; i++)
+   for (i = intelObj->base.BaseLevel; i <= intelObj->_MaxLevel; i++)
       intel_tex_map_level_images(intel, intelObj, i);
 }
 
@@ -299,6 +234,6 @@ intel_tex_unmap_images(struct intel_context *intel,
 {
    int i;
 
-   for (i = intelObj->firstLevel; i <= intelObj->lastLevel; i++)
+   for (i = intelObj->base.BaseLevel; i <= intelObj->_MaxLevel; i++)
       intel_tex_unmap_level_images(intel, intelObj, i);
 }