35 files changed, 1610 insertions, 1543 deletions
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
index 82a92a9f45..6aa36d10b1 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -195,7 +195,11 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
 {
    struct intel_context *intel = batch->intel;
    GLuint used = batch->ptr - batch->map;
-   GLboolean was_locked = intel->locked;
+
+   if (intel->first_post_swapbuffers_batch == NULL) {
+      intel->first_post_swapbuffers_batch = intel->batch->buf;
+      drm_intel_bo_reference(intel->first_post_swapbuffers_batch);
+   }
 
    if (intel->first_post_swapbuffers_batch == NULL) {
       intel->first_post_swapbuffers_batch = intel->batch->buf;
@@ -248,13 +252,9 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
    /* TODO: Just pass the relocation list and dma buffer up to the
     * kernel.
     */
-   if (!was_locked)
-      LOCK_HARDWARE(intel);
-
+   LOCK_HARDWARE(intel);
    do_flush_locked(batch, used, GL_FALSE);
-
-   if (!was_locked)
-      UNLOCK_HARDWARE(intel);
+   UNLOCK_HARDWARE(intel);
 
    if (INTEL_DEBUG & DEBUG_SYNC) {
       fprintf(stderr, "waiting for idle\n");
diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c
index 4919828131..0c5be4c798 100644
--- a/src/mesa/drivers/dri/intel/intel_blit.c
+++ b/src/mesa/drivers/dri/intel/intel_blit.c
@@ -108,6 +108,8 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
 	 CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
       }
 
+      assert(src->tiling != I915_TILING_Y);
+      assert(dst->tiling != I915_TILING_Y);
 #ifndef I915
       if (src->tiling != I915_TILING_NONE) {
 	 CMD |= XY_SRC_TILED;
@@ -175,66 +177,6 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
    UNLOCK_HARDWARE(intel);
 }
 
-
-
-
-void
-intelEmitFillBlit(struct intel_context *intel,
-		  GLuint cpp,
-		  GLshort dst_pitch,
-		  dri_bo *dst_buffer,
-		  GLuint dst_offset,
-		  uint32_t dst_tiling,
-		  GLshort x, GLshort y,
-		  GLshort w, GLshort h,
-		  GLuint color)
-{
-   GLuint BR13, CMD;
-   BATCH_LOCALS;
-
-   dst_pitch *= cpp;
-
-   switch (cpp) {
-   case 1:
-      BR13 = (0xF0 << 16);
-      CMD = XY_COLOR_BLT_CMD;
-      break;
-   case 2:
-      BR13 = (0xF0 << 16) | BR13_565;
-      CMD = XY_COLOR_BLT_CMD;
-      break;
-   case 4:
-      BR13 = (0xF0 << 16) | BR13_8888;
-      CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
-      break;
-   default:
-      return;
-   }
-#ifndef I915
-   if (dst_tiling != I915_TILING_NONE) {
-      CMD |= XY_DST_TILED;
-      dst_pitch /= 4;
-   }
-#endif
-
-   DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
-       __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h);
-
-   assert(w > 0);
-   assert(h > 0);
-
-   BEGIN_BATCH(6, NO_LOOP_CLIPRECTS);
-   OUT_BATCH(CMD);
-   OUT_BATCH(BR13 | dst_pitch);
-   OUT_BATCH((y << 16) | x);
-   OUT_BATCH(((y + h) << 16) | (x + w));
-   OUT_RELOC(dst_buffer,
-	     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-	     dst_offset);
-   OUT_BATCH(color);
-   ADVANCE_BATCH();
-}
-
 static GLuint translate_raster_op(GLenum logicop)
 {
    switch(logicop) {
@@ -261,7 +203,7 @@ static GLuint translate_raster_op(GLenum logicop)
 
 /* Copy BitBlt
  */
-void
+GLboolean
 intelEmitCopyBlit(struct intel_context *intel,
 		  GLuint cpp,
 		  GLshort src_pitch,
@@ -283,6 +225,19 @@ intelEmitCopyBlit(struct intel_context *intel,
    dri_bo *aper_array[3];
    BATCH_LOCALS;
 
+   if (dst_tiling != I915_TILING_NONE) {
+      if (dst_offset & 4095)
+	 return GL_FALSE;
+      if (dst_tiling == I915_TILING_Y)
+	 return GL_FALSE;
+   }
+   if (src_tiling != I915_TILING_NONE) {
+      if (src_offset & 4095)
+	 return GL_FALSE;
+      if (src_tiling == I915_TILING_Y)
+	 return GL_FALSE;
+   }
+
    /* do space/cliprects check before going any further */
    do {
        aper_array[0] = intel->batch->buf;
@@ -297,12 +252,7 @@ intelEmitCopyBlit(struct intel_context *intel,
    } while (pass < 2);
 
    if (pass >= 2) {
-       GLboolean locked = GL_FALSE;       
-       if (!intel->locked) {
-           LOCK_HARDWARE(intel);
-           locked = GL_TRUE;
-       }
-
+       LOCK_HARDWARE(intel);
        dri_bo_map(dst_buffer, GL_TRUE);
        dri_bo_map(src_buffer, GL_FALSE);
        _mesa_copy_rect((GLubyte *)dst_buffer->virtual + dst_offset,
@@ -316,11 +266,9 @@ intelEmitCopyBlit(struct intel_context *intel,
        
        dri_bo_unmap(src_buffer);
        dri_bo_unmap(dst_buffer);
-       
-       if (locked)
-           UNLOCK_HARDWARE(intel);
+       UNLOCK_HARDWARE(intel);
 
-       return;
+       return GL_TRUE;
    }
 
    intel_batchbuffer_require_space(intel->batch, 8 * 4, NO_LOOP_CLIPRECTS);
@@ -347,7 +295,7 @@ intelEmitCopyBlit(struct intel_context *intel,
       CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
       break;
    default:
-      return;
+      return GL_FALSE;
    }
 
 #ifndef I915
@@ -362,7 +310,7 @@ intelEmitCopyBlit(struct intel_context *intel,
 #endif
 
    if (dst_y2 <= dst_y || dst_x2 <= dst_x) {
-      return;
+      return GL_TRUE;
    }
 
    assert(dst_x < dst_x2);
@@ -384,6 +332,8 @@ intelEmitCopyBlit(struct intel_context *intel,
    ADVANCE_BATCH();
 
    intel_batchbuffer_emit_mi_flush(intel->batch);
+
+   return GL_TRUE;
 }
 
 
@@ -527,6 +477,8 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
                   BR13 |= BR13_565;
                }
 
+	       assert(irb->region->tiling != I915_TILING_Y);
+
 #ifndef I915
 	       if (irb->region->tiling != I915_TILING_NONE) {
 		  CMD |= XY_DST_TILED;
@@ -596,7 +548,7 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
    UNLOCK_HARDWARE(intel);
 }
 
-void
+GLboolean
 intelEmitImmediateColorExpandBlit(struct intel_context *intel,
 				  GLuint cpp,
 				  GLubyte *src_bits, GLuint src_size,
@@ -612,11 +564,19 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
    int dwords = ALIGN(src_size, 8) / 4;
    uint32_t opcode, br13, blit_cmd;
 
+   if (dst_tiling != I915_TILING_NONE) {
+      if (dst_offset & 4095)
+	 return GL_FALSE;
+      if (dst_tiling == I915_TILING_Y)
+	 return GL_FALSE;
+   }
+
    assert( logic_op - GL_CLEAR >= 0 );
    assert( logic_op - GL_CLEAR < 0x10 );
+   assert(dst_pitch > 0);
 
    if (w < 0 || h < 0)
-      return;
+      return GL_TRUE;
 
    dst_pitch *= cpp;
 
@@ -673,4 +633,46 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
 			   REFERENCES_CLIPRECTS );
 
    intel_batchbuffer_emit_mi_flush(intel->batch);
+
+   return GL_TRUE;
+}
+
+/* We don't have a memmove-type blit like some other hardware, so we'll do a
+ * rectangular blit covering a large space, then emit 1-scanline blit at the
+ * end to cover the last if we need.
+ */
+void
+intel_emit_linear_blit(struct intel_context *intel,
+		       drm_intel_bo *dst_bo,
+		       unsigned int dst_offset,
+		       drm_intel_bo *src_bo,
+		       unsigned int src_offset,
+		       unsigned int size)
+{
+   GLuint pitch, height;
+
+   /* The pitch is a signed value. */
+   pitch = MIN2(size, (1 << 15) - 1);
+   height = size / pitch;
+   intelEmitCopyBlit(intel, 1,
+		     pitch, src_bo, src_offset, I915_TILING_NONE,
+		     pitch, dst_bo, dst_offset, I915_TILING_NONE,
+		     0, 0, /* src x/y */
+		     0, 0, /* dst x/y */
+		     pitch, height, /* w, h */
+		     GL_COPY);
+
+   src_offset += pitch * height;
+   dst_offset += pitch * height;
+   size -= pitch * height;
+   assert (size < (1 << 15));
+   if (size != 0) {
+      intelEmitCopyBlit(intel, 1,
+			size, src_bo, src_offset, I915_TILING_NONE,
+			size, dst_bo, dst_offset, I915_TILING_NONE,
+			0, 0, /* src x/y */
+			0, 0, /* dst x/y */
+			size, 1, /* w, h */
+			GL_COPY);
+   }
 }
diff --git a/src/mesa/drivers/dri/intel/intel_blit.h b/src/mesa/drivers/dri/intel/intel_blit.h
index 52065b13ed..240cb7cd1b 100644
--- a/src/mesa/drivers/dri/intel/intel_blit.h
+++ b/src/mesa/drivers/dri/intel/intel_blit.h
@@ -35,7 +35,8 @@ extern void intelCopyBuffer(const __DRIdrawablePrivate * dpriv,
 
 extern void intelClearWithBlit(GLcontext * ctx, GLbitfield mask);
 
-extern void intelEmitCopyBlit(struct intel_context *intel,
+GLboolean
+intelEmitCopyBlit(struct intel_context *intel,
                               GLuint cpp,
                               GLshort src_pitch,
                               dri_bo *src_buffer,
@@ -50,16 +51,7 @@ extern void intelEmitCopyBlit(struct intel_context *intel,
                               GLshort w, GLshort h,
 			      GLenum logicop );
 
-extern void intelEmitFillBlit(struct intel_context *intel,
-                              GLuint cpp,
-                              GLshort dst_pitch,
-                              dri_bo *dst_buffer,
-                              GLuint dst_offset,
-			      uint32_t dst_tiling,
-                              GLshort x, GLshort y,
-                              GLshort w, GLshort h, GLuint color);
-
-void
+GLboolean
 intelEmitImmediateColorExpandBlit(struct intel_context *intel,
 				  GLuint cpp,
 				  GLubyte *src_bits, GLuint src_size,
@@ -71,5 +63,11 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
 				  GLshort x, GLshort y,
 				  GLshort w, GLshort h,
 				  GLenum logic_op);
+void intel_emit_linear_blit(struct intel_context *intel,
+			    drm_intel_bo *dst_bo,
+			    unsigned int dst_offset,
+			    drm_intel_bo *src_bo,
+			    unsigned int src_offset,
+			    unsigned int size);
 
 #endif
diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index 2e6b77824d..c55c5c426e 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -28,13 +28,18 @@
 
 #include "main/imports.h"
 #include "main/mtypes.h"
+#include "main/macros.h"
 #include "main/bufferobj.h"
 
 #include "intel_context.h"
+#include "intel_blit.h"
 #include "intel_buffer_objects.h"
 #include "intel_batchbuffer.h"
 #include "intel_regions.h"
 
+static GLboolean
+intel_bufferobj_unmap(GLcontext * ctx,
+                      GLenum target, struct gl_buffer_object *obj);
 
 /** Allocates a new dri_bo to store the data for the buffer object. */
 static void
@@ -100,7 +105,13 @@ intel_bufferobj_free(GLcontext * ctx, struct gl_buffer_object *obj)
    struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
 
    assert(intel_obj);
-   assert(!obj->Pointer); /* Mesa should have unmapped it */
+
+   /* Buffer objects are automatically unmapped when deleting according
+    * to the spec, but Mesa doesn't do UnmapBuffer for us at context destroy
+    * (though it does if you call glDeleteBuffers)
+    */
+   if (obj->Pointer)
+      intel_bufferobj_unmap(ctx, 0, obj);
 
    _mesa_free(intel_obj->sys_buffer);
    if (intel_obj->region) {
@@ -119,9 +130,10 @@ intel_bufferobj_free(GLcontext * ctx, struct gl_buffer_object *obj)
  * Allocate space for and store data in a buffer object.  Any data that was
  * previously stored in the buffer object is lost.  If data is NULL,
  * memory will be allocated, but no copy will occur.
- * Called via glBufferDataARB().
+ * Called via ctx->Driver.BufferData().
+ * \return GL_TRUE for success, GL_FALSE if out of memory
  */
-static void
+static GLboolean
 intel_bufferobj_data(GLcontext * ctx,
                      GLenum target,
                      GLsizeiptrARB size,
@@ -156,15 +168,19 @@ intel_bufferobj_data(GLcontext * ctx,
 	 if (intel_obj->sys_buffer != NULL) {
 	    if (data != NULL)
 	       memcpy(intel_obj->sys_buffer, data, size);
-	    return;
+	    return GL_TRUE;
 	 }
       }
 #endif
       intel_bufferobj_alloc_buffer(intel, intel_obj);
+      if (!intel_obj->buffer)
+         return GL_FALSE;
 
       if (data != NULL)
 	 dri_bo_subdata(intel_obj->buffer, 0, size, data);
    }
+
+   return GL_TRUE;
 }
 
 
@@ -234,6 +250,11 @@ intel_bufferobj_map(GLcontext * ctx,
       return obj->Pointer;
    }
 
+   /* Flush any existing batchbuffer that might have written to this
+    * buffer.
+    */
+   intelFlush(ctx);
+
    if (intel_obj->region)
       intel_bufferobj_cow(intel, intel_obj);
 
@@ -251,32 +272,205 @@ intel_bufferobj_map(GLcontext * ctx,
    }
 
    obj->Pointer = intel_obj->buffer->virtual;
+   obj->Length = obj->Size;
+   obj->Offset = 0;
+
+   return obj->Pointer;
+}
+
+/**
+ * Called via glMapBufferRange().
+ *
+ * The goal of this extension is to allow apps to accumulate their rendering
+ * at the same time as they accumulate their buffer object.  Without it,
+ * you'd end up blocking on execution of rendering every time you mapped
+ * the buffer to put new data in.
+ *
+ * We support it in 3 ways: If unsynchronized, then don't bother
+ * flushing the batchbuffer before mapping the buffer, which can save blocking
+ * in many cases.  If we would still block, and they allow the whole buffer
+ * to be invalidated, then just allocate a new buffer to replace the old one.
+ * If not, and we'd block, and they allow the subrange of the buffer to be
+ * invalidated, then we can make a new little BO, let them write into that,
+ * and blit it into the real BO at unmap time.
+ */
+static void *
+intel_bufferobj_map_range(GLcontext * ctx,
+			  GLenum target, GLintptr offset, GLsizeiptr length,
+			  GLbitfield access, struct gl_buffer_object *obj)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+
+   assert(intel_obj);
+
+   /* _mesa_MapBufferRange (GL entrypoint) sets these, but the vbo module also
+    * internally uses our functions directly.
+    */
+   obj->Offset = offset;
+   obj->Length = length;
+   obj->AccessFlags = access;
+
+   if (intel_obj->sys_buffer) {
+      obj->Pointer = intel_obj->sys_buffer + offset;
+      return obj->Pointer;
+   }
+
+   if (intel_obj->region)
+      intel_bufferobj_cow(intel, intel_obj);
+
+   /* If the mapping is synchronized with other GL operations, flush
+    * the batchbuffer so that GEM knows about the buffer access for later
+    * syncing.
+    */
+   if (!(access & GL_MAP_UNSYNCHRONIZED_BIT))
+      intelFlush(ctx);
+
+   if (intel_obj->buffer == NULL) {
+      obj->Pointer = NULL;
+      return NULL;
+   }
+
+   /* If the user doesn't care about existing buffer contents and mapping
+    * would cause us to block, then throw out the old buffer.
+    */
+   if (!(access & GL_MAP_UNSYNCHRONIZED_BIT) &&
+       (access & GL_MAP_INVALIDATE_BUFFER_BIT) &&
+       drm_intel_bo_busy(intel_obj->buffer)) {
+      drm_intel_bo_unreference(intel_obj->buffer);
+      intel_obj->buffer = dri_bo_alloc(intel->bufmgr, "bufferobj",
+				       intel_obj->Base.Size, 64);
+   }
+
+   /* If the user is mapping a range of an active buffer object but
+    * doesn't require the current contents of that range, make a new
+    * BO, and we'll copy what they put in there out at unmap or
+    * FlushRange time.
+    */
+   if ((access & GL_MAP_INVALIDATE_RANGE_BIT) &&
+       drm_intel_bo_busy(intel_obj->buffer)) {
+      if (access & GL_MAP_FLUSH_EXPLICIT_BIT) {
+	 intel_obj->range_map_buffer = _mesa_malloc(length);
+	 obj->Pointer = intel_obj->range_map_buffer;
+      } else {
+	 intel_obj->range_map_bo = drm_intel_bo_alloc(intel->bufmgr,
+						      "range map",
+						      length, 64);
+	 if (!(access & GL_MAP_READ_BIT) &&
+	     intel->intelScreen->kernel_exec_fencing) {
+	    drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo);
+	    intel_obj->mapped_gtt = GL_TRUE;
+	 } else {
+	    drm_intel_bo_map(intel_obj->range_map_bo,
+			     (access & GL_MAP_WRITE_BIT) != 0);
+	    intel_obj->mapped_gtt = GL_FALSE;
+	 }
+	 obj->Pointer = intel_obj->range_map_bo->virtual;
+      }
+      return obj->Pointer;
+   }
+
+   if (!(access & GL_MAP_READ_BIT) &&
+       intel->intelScreen->kernel_exec_fencing) {
+      drm_intel_gem_bo_map_gtt(intel_obj->buffer);
+      intel_obj->mapped_gtt = GL_TRUE;
+   } else {
+      drm_intel_bo_map(intel_obj->buffer, (access & GL_MAP_WRITE_BIT) != 0);
+      intel_obj->mapped_gtt = GL_FALSE;
+   }
+
+   obj->Pointer = intel_obj->buffer->virtual + offset;
    return obj->Pointer;
 }
 
+/* Ideally we'd use a BO to avoid taking up cache space for the temporary
+ * data, but FlushMappedBufferRange may be followed by further writes to
+ * the pointer, so we would have to re-map after emitting our blit, which
+ * would defeat the point.
+ */
+static void
+intel_bufferobj_flush_mapped_range(GLcontext *ctx, GLenum target,
+				   GLintptr offset, GLsizeiptr length,
+				   struct gl_buffer_object *obj)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+   drm_intel_bo *temp_bo;
+
+   /* Unless we're in the range map using a temporary system buffer,
+    * there's no work to do.
+    */
+   if (intel_obj->range_map_buffer == NULL)
+      return;
+
+   temp_bo = drm_intel_bo_alloc(intel->bufmgr, "range map flush", length, 64);
+
+   drm_intel_bo_subdata(temp_bo, 0, length, intel_obj->range_map_buffer);
+
+   intel_emit_linear_blit(intel,
+			  intel_obj->buffer, obj->Offset + offset,
+			  temp_bo, 0,
+			  length);
+
+   drm_intel_bo_unreference(temp_bo);
+}
+
 
 /**
- * Called via glMapBufferARB().
+ * Called via glUnmapBuffer().
  */
 static GLboolean
 intel_bufferobj_unmap(GLcontext * ctx,
                       GLenum target, struct gl_buffer_object *obj)
 {
+   struct intel_context *intel = intel_context(ctx);
    struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
 
    assert(intel_obj);
+   assert(obj->Pointer);
    if (intel_obj->sys_buffer != NULL) {
-      assert(obj->Pointer);
-      obj->Pointer = NULL;
+      /* always keep the mapping around. */
+   } else if (intel_obj->range_map_buffer != NULL) {
+      /* Since we've emitted some blits to buffers that will (likely) be used
+       * in rendering operations in other cache domains in this batch, emit a
+       * flush.  Once again, we wish for a domain tracker in libdrm to cover
+       * usage inside of a batchbuffer.
+       */
+      intel_batchbuffer_emit_mi_flush(intel->batch);
+      free(intel_obj->range_map_buffer);
+      intel_obj->range_map_buffer = NULL;
+   } else if (intel_obj->range_map_bo != NULL) {
+      if (intel_obj->mapped_gtt) {
+	 drm_intel_gem_bo_unmap_gtt(intel_obj->range_map_bo);
+      } else {
+	 drm_intel_bo_unmap(intel_obj->range_map_bo);
+      }
+
+      intel_emit_linear_blit(intel,
+			     intel_obj->buffer, obj->Offset,
+			     intel_obj->range_map_bo, 0,
+			     obj->Length);
+
+      /* Since we've emitted some blits to buffers that will (likely) be used
+       * in rendering operations in other cache domains in this batch, emit a
+       * flush.  Once again, we wish for a domain tracker in libdrm to cover
+       * usage inside of a batchbuffer.
+       */
+      intel_batchbuffer_emit_mi_flush(intel->batch);
+
+      drm_intel_bo_unreference(intel_obj->range_map_bo);
+      intel_obj->range_map_bo = NULL;
    } else if (intel_obj->buffer != NULL) {
-      assert(obj->Pointer);
       if (intel_obj->mapped_gtt) {
 	 drm_intel_gem_bo_unmap_gtt(intel_obj->buffer);
       } else {
 	 drm_intel_bo_unmap(intel_obj->buffer);
       }
-      obj->Pointer = NULL;
    }
+   obj->Pointer = NULL;
+   obj->Offset = 0;
+   obj->Length = 0;
+
    return GL_TRUE;
 }
 
@@ -294,30 +488,94 @@ intel_bufferobj_buffer(struct intel_context *intel,
    }
 
    if (intel_obj->buffer == NULL) {
+      void *sys_buffer = intel_obj->sys_buffer;
+
+      /* only one of buffer and sys_buffer could be non-NULL */
       intel_bufferobj_alloc_buffer(intel, intel_obj);
+      intel_obj->sys_buffer = NULL;
+
       intel_bufferobj_subdata(&intel->ctx,
 			      GL_ARRAY_BUFFER_ARB,
 			      0,
 			      intel_obj->Base.Size,
-			      intel_obj->sys_buffer,
+			      sys_buffer,
 			      &intel_obj->Base);
-      _mesa_free(intel_obj->sys_buffer);
+      _mesa_free(sys_buffer);
       intel_obj->sys_buffer = NULL;
    }
 
    return intel_obj->buffer;
 }
 
+static void
+intel_bufferobj_copy_subdata(GLcontext *ctx,
+			     struct gl_buffer_object *src,
+			     struct gl_buffer_object *dst,
+			     GLintptr read_offset, GLintptr write_offset,
+			     GLsizeiptr size)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *intel_src = intel_buffer_object(src);
+   struct intel_buffer_object *intel_dst = intel_buffer_object(dst);
+   drm_intel_bo *src_bo, *dst_bo;
+
+   if (size == 0)
+      return;
+
+   /* If we're in system memory, just map and memcpy. */
+   if (intel_src->sys_buffer || intel_dst->sys_buffer) {
+      /* The same buffer may be used, but note that regions copied may
+       * not overlap.
+       */
+      if (src == dst) {
+	 char *ptr = intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER,
+					 GL_READ_WRITE, dst);
+	 memcpy(ptr + write_offset, ptr + read_offset, size);
+	 intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst);
+      } else {
+	 const char *src_ptr;
+	 char *dst_ptr;
+
+	 src_ptr =  intel_bufferobj_map(ctx, GL_COPY_READ_BUFFER,
+					GL_READ_ONLY, src);
+	 dst_ptr =  intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER,
+					GL_WRITE_ONLY, dst);
+
+	 memcpy(dst_ptr + write_offset, src_ptr + read_offset, size);
+
+	 intel_bufferobj_unmap(ctx, GL_COPY_READ_BUFFER, src);
+	 intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst);
+      }
+   }
+
+   /* Otherwise, we have real BOs, so blit them. */
+
+   dst_bo = intel_bufferobj_buffer(intel, intel_dst, INTEL_WRITE_PART);
+   src_bo = intel_bufferobj_buffer(intel, intel_src, INTEL_READ);
+
+   intel_emit_linear_blit(intel,
+			  dst_bo, write_offset,
+			  src_bo, read_offset, size);
+
+   /* Since we've emitted some blits to buffers that will (likely) be used
+    * in rendering operations in other cache domains in this batch, emit a
+    * flush.  Once again, we wish for a domain tracker in libdrm to cover
+    * usage inside of a batchbuffer.
+    */
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
 void
-intel_bufferobj_init(struct intel_context *intel)
+intelInitBufferObjectFuncs(struct dd_function_table *functions)
 {
-   GLcontext *ctx = &intel->ctx;
-
-   ctx->Driver.NewBufferObject = intel_bufferobj_alloc;
-   ctx->Driver.DeleteBuffer = intel_bufferobj_free;
-   ctx->Driver.BufferData = intel_bufferobj_data;
-   ctx->Driver.BufferSubData = intel_bufferobj_subdata;
-   ctx->Driver.GetBufferSubData = intel_bufferobj_get_subdata;
-   ctx->Driver.MapBuffer = intel_bufferobj_map;
-   ctx->Driver.UnmapBuffer = intel_bufferobj_unmap;
+   functions->NewBufferObject = intel_bufferobj_alloc;
+   functions->DeleteBuffer = intel_bufferobj_free;
+   functions->BufferData = intel_bufferobj_data;
+   functions->BufferSubData = intel_bufferobj_subdata;
+   functions->GetBufferSubData = intel_bufferobj_get_subdata;
+   functions->MapBuffer = intel_bufferobj_map;
+   functions->MapBufferRange = intel_bufferobj_map_range;
+   functions->FlushMappedBufferRange = intel_bufferobj_flush_mapped_range;
+   functions->UnmapBuffer = intel_bufferobj_unmap;
+   functions->CopyBufferSubData = intel_bufferobj_copy_subdata;
 }
diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.h b/src/mesa/drivers/dri/intel/intel_buffer_objects.h
index 0431015631..bf3e08a320 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.h
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.h
@@ -48,6 +48,12 @@ struct intel_buffer_object
    struct intel_region *region; /* Is there a zero-copy texture
                                    associated with this (pixel)
                                    buffer object? */
+
+   drm_intel_bo *range_map_bo;
+   void *range_map_buffer;
+   unsigned int range_map_offset;
+   GLsizei range_map_size;
+
    GLboolean mapped_gtt;
 };
 
@@ -60,7 +66,7 @@ dri_bo *intel_bufferobj_buffer(struct intel_context *intel,
 
 /* Hook the bufferobject implementation into mesa: 
  */
-void intel_bufferobj_init(struct intel_context *intel);
+void intelInitBufferObjectFuncs(struct dd_function_table *functions);
 
 
 
@@ -72,10 +78,7 @@ void intel_bufferobj_init(struct intel_context *intel);
 static INLINE struct intel_buffer_object *
 intel_buffer_object(struct gl_buffer_object *obj)
 {
-   if (obj->Name)
-      return (struct intel_buffer_object *) obj;
-   else
-      return NULL;
+   return (struct intel_buffer_object *) obj;
 }
 
 /* Helpers for zerocopy image uploads.  See also intel_regions.h:
diff --git a/src/mesa/drivers/dri/intel/intel_buffers.c b/src/mesa/drivers/dri/intel/intel_buffers.c
index 44e34330a6..e7357e78c5 100644
--- a/src/mesa/drivers/dri/intel/intel_buffers.c
+++ b/src/mesa/drivers/dri/intel/intel_buffers.c
@@ -157,7 +157,7 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
    /* Do this here, not core Mesa, since this function is called from
     * many places within the driver.
     */
-   if (ctx->NewState & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
+   if (ctx->NewState & _NEW_BUFFERS) {
       /* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */
       _mesa_update_framebuffer(ctx);
       /* this updates the DrawBuffer's Width/Height if it's a FBO */
diff --git a/src/mesa/drivers/dri/intel/intel_chipset.h b/src/mesa/drivers/dri/intel/intel_chipset.h
index a528d996dc..3dc8653a73 100644
--- a/src/mesa/drivers/dri/intel/intel_chipset.h
+++ b/src/mesa/drivers/dri/intel/intel_chipset.h
@@ -68,13 +68,18 @@
 #define PCI_CHIP_G41_G                  0x2E32
 #define PCI_CHIP_B43_G                  0x2E42
 
+#define PCI_CHIP_ILD_G                  0x0042
+#define PCI_CHIP_ILM_G                  0x0046
+
 #define IS_MOBILE(devid)	(devid == PCI_CHIP_I855_GM || \
 				 devid == PCI_CHIP_I915_GM || \
 				 devid == PCI_CHIP_I945_GM || \
 				 devid == PCI_CHIP_I945_GME || \
 				 devid == PCI_CHIP_I965_GM || \
 				 devid == PCI_CHIP_I965_GME || \
-				 devid == PCI_CHIP_GM45_GM || IS_IGD(devid))
+				 devid == PCI_CHIP_GM45_GM || \
+				 IS_IGD(devid) || \
+				 devid == PCI_CHIP_ILM_G)
 
 #define IS_G45(devid)           (devid == PCI_CHIP_IGD_E_G || \
                                  devid == PCI_CHIP_Q45_G || \
@@ -84,6 +89,10 @@
 #define IS_GM45(devid)          (devid == PCI_CHIP_GM45_GM)
 #define IS_G4X(devid)		(IS_G45(devid) || IS_GM45(devid))
 
+#define IS_ILD(devid)           (devid == PCI_CHIP_ILD_G)
+#define IS_ILM(devid)           (devid == PCI_CHIP_ILM_G)
+#define IS_IGDNG(devid)           (IS_ILD(devid) || IS_ILM(devid))
+
 #define IS_915(devid)		(devid == PCI_CHIP_I915_G || \
 				 devid == PCI_CHIP_E7221_G || \
 				 devid == PCI_CHIP_I915_GM)
@@ -101,7 +110,8 @@
 				 devid == PCI_CHIP_I965_GM || \
 				 devid == PCI_CHIP_I965_GME || \
 				 devid == PCI_CHIP_I946_GZ || \
-				 IS_G4X(devid))
+				 IS_G4X(devid) || \
+				 IS_IGDNG(devid))
 
 #define IS_9XX(devid)		(IS_915(devid) || \
 				 IS_945(devid) || \
diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c
index 19f47632ac..9010b910c7 100644
--- a/src/mesa/drivers/dri/intel/intel_clear.c
+++ b/src/mesa/drivers/dri/intel/intel_clear.c
@@ -27,25 +27,9 @@
  **************************************************************************/
 
 #include "main/glheader.h"
-#include "main/enums.h"
-#include "main/image.h"
 #include "main/mtypes.h"
-#include "main/arrayobj.h"
-#include "main/attrib.h"
-#include "main/blend.h"
-#include "main/bufferobj.h"
-#include "main/buffers.h"
-#include "main/depth.h"
-#include "main/enable.h"
-#include "main/macros.h"
-#include "main/matrix.h"
-#include "main/polygon.h"
-#include "main/texstate.h"
-#include "main/shaders.h"
-#include "main/stencil.h"
-#include "main/varray.h"
-#include "glapi/dispatch.h"
 #include "swrast/swrast.h"
+#include "drivers/common/meta.h"
 
 #include "intel_context.h"
 #include "intel_blit.h"
@@ -53,240 +37,11 @@
 #include "intel_clear.h"
 #include "intel_fbo.h"
 #include "intel_pixel.h"
+#include "intel_regions.h"
+#include "intel_batchbuffer.h"
 
 #define FILE_DEBUG_FLAG DEBUG_BLIT
 
-#define TRI_CLEAR_COLOR_BITS (BUFFER_BIT_BACK_LEFT |			\
-			      BUFFER_BIT_FRONT_LEFT |			\
-			      BUFFER_BIT_COLOR0 |			\
-			      BUFFER_BIT_COLOR1 |			\
-			      BUFFER_BIT_COLOR2 |			\
-			      BUFFER_BIT_COLOR3 |			\
-			      BUFFER_BIT_COLOR4 |			\
-			      BUFFER_BIT_COLOR5 |			\
-			      BUFFER_BIT_COLOR6 |			\
-			      BUFFER_BIT_COLOR7)
-
-
-/**
- * Per-context one-time init of things for intl_clear_tris().
- * Basically set up a private array object for vertex/color arrays.
- */
-static void
-init_clear(GLcontext *ctx)
-{
-   struct intel_context *intel = intel_context(ctx);
-   struct gl_array_object *arraySave = NULL;
-   const GLuint arrayBuffer = ctx->Array.ArrayBufferObj->Name;
-   const GLuint elementBuffer = ctx->Array.ElementArrayBufferObj->Name;
-
-   /* create new array object */
-   intel->clear.arrayObj = _mesa_new_array_object(ctx, ~0);
-
-   /* save current array object, bind new one */
-   _mesa_reference_array_object(ctx, &arraySave, ctx->Array.ArrayObj);
-   _mesa_reference_array_object(ctx, &ctx->Array.ArrayObj, intel->clear.arrayObj);
-
-   /* one-time setup of vertex arrays (pos, color) */
-   _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
-   _mesa_BindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, 0);
-   _mesa_ColorPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), intel->clear.color);
-   _mesa_VertexPointer(3, GL_FLOAT, 3 * sizeof(GLfloat), intel->clear.vertices);
-   _mesa_Enable(GL_COLOR_ARRAY);
-   _mesa_Enable(GL_VERTEX_ARRAY);
-
-   /* restore original array object */
-   _mesa_reference_array_object(ctx, &ctx->Array.ArrayObj, arraySave);
-   _mesa_reference_array_object(ctx, &arraySave, NULL);
-
-   /* restore original buffer objects */
-   _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, arrayBuffer);
-   _mesa_BindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, elementBuffer);
-}
-
-
-
-/**
- * Perform glClear where mask contains only color, depth, and/or stencil.
- *
- * The implementation is based on calling into Mesa to set GL state and
- * performing normal triangle rendering.  The intent of this path is to
- * have as generic a path as possible, so that any driver could make use of
- * it.
- */
-void
-intel_clear_tris(GLcontext *ctx, GLbitfield mask)
-{
-   struct intel_context *intel = intel_context(ctx);
-   GLfloat dst_z;
-   struct gl_framebuffer *fb = ctx->DrawBuffer;
-   int i;
-   GLboolean saved_fp_enable = GL_FALSE, saved_vp_enable = GL_FALSE;
-   GLuint saved_shader_program = 0;
-   unsigned int saved_active_texture;
-   struct gl_array_object *arraySave = NULL;
-
-   if (!intel->clear.arrayObj)
-      init_clear(ctx);
-
-   assert((mask & ~(TRI_CLEAR_COLOR_BITS | BUFFER_BIT_DEPTH |
-		    BUFFER_BIT_STENCIL)) == 0);
-
-   _mesa_PushAttrib(GL_COLOR_BUFFER_BIT |
-		    GL_CURRENT_BIT |
-		    GL_DEPTH_BUFFER_BIT |
-		    GL_ENABLE_BIT |
-		    GL_POLYGON_BIT |
-		    GL_STENCIL_BUFFER_BIT |
-		    GL_TRANSFORM_BIT |
-		    GL_CURRENT_BIT);
-   saved_active_texture = ctx->Texture.CurrentUnit;
-
-   /* Disable existing GL state we don't want to apply to a clear. */
-   _mesa_Disable(GL_ALPHA_TEST);
-   _mesa_Disable(GL_BLEND);
-   _mesa_Disable(GL_CULL_FACE);
-   _mesa_Disable(GL_FOG);
-   _mesa_Disable(GL_POLYGON_SMOOTH);
-   _mesa_Disable(GL_POLYGON_STIPPLE);
-   _mesa_Disable(GL_POLYGON_OFFSET_FILL);
-   _mesa_Disable(GL_LIGHTING);
-   _mesa_Disable(GL_CLIP_PLANE0);
-   _mesa_Disable(GL_CLIP_PLANE1);
-   _mesa_Disable(GL_CLIP_PLANE2);
-   _mesa_Disable(GL_CLIP_PLANE3);
-   _mesa_Disable(GL_CLIP_PLANE4);
-   _mesa_Disable(GL_CLIP_PLANE5);
-   _mesa_PolygonMode(GL_FRONT_AND_BACK, GL_FILL);
-   if (ctx->Extensions.ARB_fragment_program && ctx->FragmentProgram.Enabled) {
-      saved_fp_enable = GL_TRUE;
-      _mesa_Disable(GL_FRAGMENT_PROGRAM_ARB);
-   }
-   if (ctx->Extensions.ARB_vertex_program && ctx->VertexProgram.Enabled) {
-      saved_vp_enable = GL_TRUE;
-      _mesa_Disable(GL_VERTEX_PROGRAM_ARB);
-   }
-   if (ctx->Extensions.ARB_shader_objects && ctx->Shader.CurrentProgram) {
-      saved_shader_program = ctx->Shader.CurrentProgram->Name;
-      _mesa_UseProgramObjectARB(0);
-   }
-
-   if (ctx->Texture._EnabledUnits != 0) {
-      int i;
-
-      for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
-	 _mesa_ActiveTextureARB(GL_TEXTURE0 + i);
-	 _mesa_Disable(GL_TEXTURE_1D);
-	 _mesa_Disable(GL_TEXTURE_2D);
-	 _mesa_Disable(GL_TEXTURE_3D);
-	 if (ctx->Extensions.ARB_texture_cube_map)
-	    _mesa_Disable(GL_TEXTURE_CUBE_MAP_ARB);
-	 if (ctx->Extensions.NV_texture_rectangle)
-	    _mesa_Disable(GL_TEXTURE_RECTANGLE_NV);
-	 if (ctx->Extensions.MESA_texture_array) {
-	    _mesa_Disable(GL_TEXTURE_1D_ARRAY_EXT);
-	    _mesa_Disable(GL_TEXTURE_2D_ARRAY_EXT);
-	 }
-      }
-   }
-
-   /* save current array object, bind our private one */
-   _mesa_reference_array_object(ctx, &arraySave, ctx->Array.ArrayObj);
-   _mesa_reference_array_object(ctx, &ctx->Array.ArrayObj, intel->clear.arrayObj);
-
-   intel_meta_set_passthrough_transform(intel);
-
-   for (i = 0; i < 4; i++) {
-      COPY_4FV(intel->clear.color[i], ctx->Color.ClearColor);
-   }
-
-   /* convert clear Z from [0,1] to NDC coord in [-1,1] */
-   dst_z = -1.0 + 2.0 * ctx->Depth.Clear;
-
-   /* Prepare the vertices, which are the same regardless of which buffer we're
-    * drawing to.
-    */
-   intel->clear.vertices[0][0] = fb->_Xmin;
-   intel->clear.vertices[0][1] = fb->_Ymin;
-   intel->clear.vertices[0][2] = dst_z;
-   intel->clear.vertices[1][0] = fb->_Xmax;
-   intel->clear.vertices[1][1] = fb->_Ymin;
-   intel->clear.vertices[1][2] = dst_z;
-   intel->clear.vertices[2][0] = fb->_Xmax;
-   intel->clear.vertices[2][1] = fb->_Ymax;
-   intel->clear.vertices[2][2] = dst_z;
-   intel->clear.vertices[3][0] = fb->_Xmin;
-   intel->clear.vertices[3][1] = fb->_Ymax;
-   intel->clear.vertices[3][2] = dst_z;
-
-   while (mask != 0) {
-      GLuint this_mask = 0;
-      GLuint color_bit;
-
-      color_bit = _mesa_ffs(mask & TRI_CLEAR_COLOR_BITS);
-      if (color_bit != 0)
-	 this_mask |= (1 << (color_bit - 1));
-
-      /* Clear depth/stencil in the same pass as color. */
-      this_mask |= (mask & (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL));
-
-      /* Select the current color buffer and use the color write mask if
-       * we have one, otherwise don't write any color channels.
-       */
-      if (this_mask & BUFFER_BIT_FRONT_LEFT)
-	 _mesa_DrawBuffer(GL_FRONT_LEFT);
-      else if (this_mask & BUFFER_BIT_BACK_LEFT)
-	 _mesa_DrawBuffer(GL_BACK_LEFT);
-      else if (color_bit != 0)
-	 _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0 +
-			  (color_bit - BUFFER_COLOR0 - 1));
-      else
-	 _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
-
-      /* Control writing of the depth clear value to depth. */
-      if (this_mask & BUFFER_BIT_DEPTH) {
-	 _mesa_DepthFunc(GL_ALWAYS);
-	 _mesa_Enable(GL_DEPTH_TEST);
-      } else {
-	 _mesa_Disable(GL_DEPTH_TEST);
-	 _mesa_DepthMask(GL_FALSE);
-      }
-
-      /* Control writing of the stencil clear value to stencil. */
-      if (this_mask & BUFFER_BIT_STENCIL) {
-	 _mesa_Enable(GL_STENCIL_TEST);
-	 _mesa_StencilOpSeparate(GL_FRONT_AND_BACK,
-				 GL_REPLACE, GL_REPLACE, GL_REPLACE);
-	 _mesa_StencilFuncSeparate(GL_FRONT_AND_BACK, GL_ALWAYS,
-				   ctx->Stencil.Clear,
-				   ctx->Stencil.WriteMask[0]);
-      } else {
-	 _mesa_Disable(GL_STENCIL_TEST);
-      }
-
-      _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
-
-      mask &= ~this_mask;
-   }
-
-   intel_meta_restore_transform(intel);
-
-   _mesa_ActiveTextureARB(GL_TEXTURE0 + saved_active_texture);
-   if (saved_fp_enable)
-      _mesa_Enable(GL_FRAGMENT_PROGRAM_ARB);
-   if (saved_vp_enable)
-      _mesa_Enable(GL_VERTEX_PROGRAM_ARB);
-
-   if (saved_shader_program)
-      _mesa_UseProgramObjectARB(saved_shader_program);
-
-   _mesa_PopAttrib();
-
-   /* restore current array object */
-   _mesa_reference_array_object(ctx, &ctx->Array.ArrayObj, arraySave);
-   _mesa_reference_array_object(ctx, &arraySave, NULL);
-}
-
 static const char *buffer_names[] = {
    [BUFFER_FRONT_LEFT] = "front",
    [BUFFER_BACK_LEFT] = "back",
@@ -340,7 +95,7 @@ intelClear(GLcontext *ctx, GLbitfield mask)
          = intel_get_rb_region(fb, BUFFER_STENCIL);
       if (stencilRegion) {
          /* have hw stencil */
-         if (IS_965(intel->intelScreen->deviceID) ||
+         if (stencilRegion->tiling == I915_TILING_Y ||
 	     (ctx->Stencil.WriteMask[0] & 0xff) != 0xff) {
 	    /* We have to use the 3D engine if we're clearing a partial mask
 	     * of the stencil buffer, or if we're on a 965 which has a tiled
@@ -357,9 +112,10 @@ intelClear(GLcontext *ctx, GLbitfield mask)
 
    /* HW depth */
    if (mask & BUFFER_BIT_DEPTH) {
+      const struct intel_region *irb = intel_get_rb_region(fb, BUFFER_DEPTH);
+
       /* clear depth with whatever method is used for stencil (see above) */
-      if (IS_965(intel->intelScreen->deviceID) ||
-	  tri_mask & BUFFER_BIT_STENCIL)
+      if (irb->tiling == I915_TILING_Y || tri_mask & BUFFER_BIT_STENCIL)
          tri_mask |= BUFFER_BIT_DEPTH;
       else
          blit_mask |= BUFFER_BIT_DEPTH;
@@ -369,7 +125,7 @@ intelClear(GLcontext *ctx, GLbitfield mask)
     * buffer with it.
     */
    if (mask & (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL)) {
-      int color_bit = _mesa_ffs(mask & TRI_CLEAR_COLOR_BITS);
+      int color_bit = _mesa_ffs(mask & BUFFER_BITS_COLOR);
       if (color_bit != 0) {
 	 tri_mask |= blit_mask & (1 << (color_bit - 1));
 	 blit_mask &= ~(1 << (color_bit - 1));
@@ -379,14 +135,18 @@ intelClear(GLcontext *ctx, GLbitfield mask)
    /* SW fallback clearing */
    swrast_mask = mask & ~tri_mask & ~blit_mask;
 
-   for (i = 0; i < BUFFER_COUNT; i++) {
-      GLuint bufBit = 1 << i;
-      if ((blit_mask | tri_mask) & bufBit) {
+   {
+      /* look for non-Intel renderbuffers (clear them with swrast) */
+      GLbitfield blit_or_tri = blit_mask | tri_mask;
+      while (blit_or_tri) {
+         GLuint i = _mesa_ffs(blit_or_tri) - 1;
+         GLbitfield bufBit = 1 << i;
          if (!fb->Attachment[i].Renderbuffer->ClassID) {
             blit_mask &= ~bufBit;
             tri_mask &= ~bufBit;
             swrast_mask |= bufBit;
          }
+         blit_or_tri ^= bufBit;
       }
    }
 
@@ -411,7 +171,9 @@ intelClear(GLcontext *ctx, GLbitfield mask)
 	 }
 	 DBG("\n");
       }
-      intel_clear_tris(ctx, tri_mask);
+
+      _mesa_meta_clear(&intel->ctx, tri_mask);
+      intel_batchbuffer_flush(intel->batch);
    }
 
    if (swrast_mask) {
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index e593b236a7..fce42e9c2d 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -38,6 +38,7 @@
 #include "swrast_setup/swrast_setup.h"
 #include "tnl/tnl.h"
 #include "drivers/common/driverfuncs.h"
+#include "drivers/common/meta.h"
 
 #include "i830_dri.h"
 
@@ -164,6 +165,12 @@ intelGetString(GLcontext * ctx, GLenum name)
       case PCI_CHIP_B43_G:
          chipset = "Intel(R) B43";
          break;
+      case PCI_CHIP_ILD_G:
+         chipset = "Intel(R) IGDNG_D";
+         break;
+      case PCI_CHIP_ILM_G:
+         chipset = "Intel(R) IGDNG_M";
+         break;
       default:
          chipset = "Unknown Intel Chipset";
          break;
@@ -401,7 +408,7 @@ intel_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h)
     if (!driContext->driScreenPriv->dri2.enabled)
 	return;
 
-    if (!intel->internal_viewport_call && ctx->DrawBuffer->Name == 0) {
+    if (!intel->meta.internal_viewport_call && ctx->DrawBuffer->Name == 0) {
        /* If we're rendering to the fake front buffer, make sure all the pending
 	* drawing has landed on the real front buffer.  Otherwise when we
 	* eventually get to DRI2GetBuffersWithFormat the stale real front
@@ -587,10 +594,15 @@ intelInitDriverFunctions(struct dd_function_table *functions)
    functions->CopyConvolutionFilter2D = _swrast_CopyConvolutionFilter2D;
 
    intelInitTextureFuncs(functions);
+   intelInitTextureImageFuncs(functions);
+   intelInitTextureSubImageFuncs(functions);
+   intelInitTextureCopyImageFuncs(functions);
    intelInitStateFuncs(functions);
    intelInitClearFuncs(functions);
    intelInitBufferFuncs(functions);
    intelInitPixelFuncs(functions);
+   intelInitBufferObjectFuncs(functions);
+   intel_init_syncobj_functions(functions);
 }
 
 
@@ -633,6 +645,10 @@ intelInitContext(struct intel_context *intel,
       intel->maxBatchSize = BATCH_SZ;
 
    intel->bufmgr = intelScreen->bufmgr;
+
+   if (0) /* for debug */
+      drm_intel_bufmgr_set_debug(intel->bufmgr, 1);
+
    intel->ttm = intelScreen->ttm;
    if (intel->ttm) {
       int bo_reuse_mode;
@@ -686,7 +702,15 @@ intelInitContext(struct intel_context *intel,
     */
    _mesa_init_point(ctx);
 
+   meta_init_metaops(ctx, &intel->meta);
    ctx->Const.MaxColorAttachments = 4;  /* XXX FBO: review this */
+   if (IS_965(intelScreen->deviceID)) {
+      if (MAX_WIDTH > 8192)
+	 ctx->Const.MaxRenderbufferSize = 8192;
+   } else {
+      if (MAX_WIDTH > 2048)
+	 ctx->Const.MaxRenderbufferSize = 2048;
+   }
 
    /* Initialize the software rasterizer and helper modules. */
    _swrast_CreateContext(ctx);
@@ -698,6 +722,8 @@ intelInitContext(struct intel_context *intel,
    _swrast_allow_pixel_fog(ctx, GL_FALSE);
    _swrast_allow_vertex_fog(ctx, GL_TRUE);
 
+   _mesa_meta_init(ctx);
+
    intel->hw_stencil = mesaVis->stencilBits && mesaVis->depthBits == 24;
    intel->hw_stipple = 1;
 
@@ -744,7 +770,6 @@ intelInitContext(struct intel_context *intel,
 
    intel->batch = intel_batchbuffer_alloc(intel);
 
-   intel_bufferobj_init(intel);
    intel_fbo_init(intel);
 
    if (intel->ctx.Mesa_DXTn) {
@@ -754,6 +779,15 @@ intelInitContext(struct intel_context *intel,
    else if (driQueryOptionb(&intel->optionCache, "force_s3tc_enable")) {
       _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
    }
+   intel->use_texture_tiling = driQueryOptionb(&intel->optionCache,
+					       "texture_tiling");
+   if (intel->use_texture_tiling &&
+       !intel->intelScreen->kernel_exec_fencing) {
+      fprintf(stderr, "No kernel support for execution fencing, "
+	      "disabling texture tiling\n");
+      intel->use_texture_tiling = GL_FALSE;
+   }
+   intel->use_early_z = driQueryOptionb(&intel->optionCache, "early_z");
 
    intel->prim.primitive = ~0;
 
@@ -793,8 +827,9 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv)
 
       INTEL_FIREVERTICES(intel);
 
-      if (intel->clear.arrayObj)
-         _mesa_delete_array_object(&intel->ctx, intel->clear.arrayObj);
+      _mesa_meta_free(&intel->ctx);
+
+      meta_destroy_metaops(&intel->meta);
 
       intel->vtbl.destroy(intel);
 
@@ -817,13 +852,64 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv)
       intel->first_post_swapbuffers_batch = NULL;
 
       if (release_texture_heaps) {
-         /* This share group is about to go away, free our private
-          * texture object data.
+         /* Nothing is currently done here to free texture heaps;
+          * but we're not using the texture heap utilities, so I
+          * rather think we shouldn't.  I've taken a look, and can't
+          * find any private texture data hanging around anywhere, but
+          * I'm not yet certain there isn't any at all...
           */
-         if (INTEL_DEBUG & DEBUG_TEXTURE)
+         /* if (INTEL_DEBUG & DEBUG_TEXTURE)
             fprintf(stderr, "do something to free texture heaps\n");
+          */
       }
 
+      /* XXX In intelMakeCurrent() below, the context's static regions are 
+       * referenced inside the frame buffer; it's listed as a hack,
+       * with a comment of "XXX FBO temporary fix-ups!", but
+       * as long as it's there, we should release the regions here.
+       * The do/while loop around the block is used to allow the
+       * "continue" statements inside the block to exit the block,
+       * to avoid many layers of "if" constructs.
+       */
+      do {
+         __DRIdrawablePrivate * driDrawPriv = intel->driDrawable;
+         struct intel_framebuffer *intel_fb;
+         struct intel_renderbuffer *irbDepth, *irbStencil;
+         if (!driDrawPriv) {
+            /* We're already detached from the drawable; exit this block. */
+            continue;
+         }
+         intel_fb = (struct intel_framebuffer *) driDrawPriv->driverPrivate;
+         if (!intel_fb) {
+            /* The frame buffer is already gone; exit this block. */
+            continue;
+         }
+         irbDepth = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH);
+         irbStencil = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL);
+
+         /* If the regions of the frame buffer still match the regions
+          * of the context, release them.  If they've changed somehow,
+          * leave them alone.
+          */
+         if (intel_fb->color_rb[0] && intel_fb->color_rb[0]->region == intel->front_region) {
+	    intel_renderbuffer_set_region(intel_fb->color_rb[0], NULL);
+         }
+         if (intel_fb->color_rb[1] && intel_fb->color_rb[1]->region == intel->back_region) {
+	    intel_renderbuffer_set_region(intel_fb->color_rb[1], NULL);
+         }
+
+         if (irbDepth && irbDepth->region == intel->depth_region) {
+	    intel_renderbuffer_set_region(irbDepth, NULL);
+         }
+         /* Usually, the stencil buffer is the same as the depth buffer;
+          * but they're handled separately in MakeCurrent, so we'll
+          * handle them separately here.
+          */
+         if (irbStencil && irbStencil->region == intel->depth_region) {
+	    intel_renderbuffer_set_region(irbStencil, NULL);
+         }
+      } while (0);
+
       intel_region_release(&intel->front_region);
       intel_region_release(&intel->back_region);
       intel_region_release(&intel->depth_region);
@@ -871,7 +957,10 @@ intelMakeCurrent(__DRIcontextPrivate * driContextPriv,
           if (driDrawPriv != driReadPriv)
               intel_update_renderbuffers(driContextPriv, driReadPriv);
       } else {
-          /* XXX FBO temporary fix-ups! */
+          /* XXX FBO temporary fix-ups!  These are released in 
+           * intelDextroyContext(), above.  Changes here should be
+           * reflected there.
+           */
           /* if the renderbuffers don't have regions, init them from the context */
          struct intel_renderbuffer *irbDepth
             = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH);
@@ -956,7 +1045,6 @@ intelContendedLock(struct intel_context *intel, GLuint flags)
    int me = intel->hHWContext;
 
    drmGetLock(intel->driFd, intel->hHWContext, flags);
-   intel->locked = 1;
 
    if (INTEL_DEBUG & DEBUG_LOCK)
       _mesa_printf("%s - got contended lock\n", __progname);
@@ -1013,9 +1101,12 @@ void LOCK_HARDWARE( struct intel_context *intel )
     struct intel_framebuffer *intel_fb = NULL;
     struct intel_renderbuffer *intel_rb = NULL;
 
-    _glthread_LOCK_MUTEX(lockMutex);
-    assert(!intel->locked);
-    intel->locked = 1;
+    intel->locked++;
+    if (intel->locked >= 2)
+       return;
+
+    if (!sPriv->dri2.enabled)
+       _glthread_LOCK_MUTEX(lockMutex);
 
     if (intel->driDrawable) {
        intel_fb = intel->driDrawable->driverPrivate;
@@ -1062,13 +1153,16 @@ void UNLOCK_HARDWARE( struct intel_context *intel )
 {
     __DRIscreen *sPriv = intel->driScreen;
 
-   intel->vtbl.note_unlock( intel );
-   intel->locked = 0;
+   intel->locked--;
+   if (intel->locked > 0)
+      return;
 
-   if (!sPriv->dri2.enabled)
-      DRM_UNLOCK(intel->driFd, intel->driHwLock, intel->hHWContext);
+   assert(intel->locked == 0);
 
-   _glthread_UNLOCK_MUTEX(lockMutex);
+   if (!sPriv->dri2.enabled) {
+      DRM_UNLOCK(intel->driFd, intel->driHwLock, intel->hHWContext);
+      _glthread_UNLOCK_MUTEX(lockMutex);
+   }
 
    if (INTEL_DEBUG & DEBUG_LOCK)
       _mesa_printf("%s - unlocked\n", __progname);
diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
index e2b3943fb5..03e7cf39d6 100644
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -33,6 +33,7 @@
 #include "main/mtypes.h"
 #include "main/mm.h"
 #include "texmem.h"
+#include "dri_metaops.h"
 #include "drm.h"
 #include "intel_bufmgr.h"
 
@@ -79,9 +80,19 @@ extern void intelFallback(struct intel_context *intel, GLuint bit,
 
 #define INTEL_MAX_FIXUP 64
 
+struct intel_sync_object {
+   struct gl_sync_object Base;
+
+   /** Batch associated with this sync object */
+   drm_intel_bo *bo;
+};
+
+/**
+ * intel_context is derived from Mesa's context class: GLcontext.
+ */
 struct intel_context
 {
-   GLcontext ctx;               /* the parent class */
+   GLcontext ctx;  /**< base class, must be first field */
 
    struct
    {
@@ -91,7 +102,6 @@ struct intel_context
       void (*new_batch) (struct intel_context * intel);
       void (*emit_invarient_state) (struct intel_context * intel);
       void (*note_fence) (struct intel_context *intel, GLuint fence);
-      void (*note_unlock) (struct intel_context *intel);
       void (*update_texture_state) (struct intel_context * intel);
 
       void (*render_start) (struct intel_context * intel);
@@ -158,19 +168,7 @@ struct intel_context
       void (*debug_batch)(struct intel_context *intel);
    } vtbl;
 
-   struct {
-      struct gl_fragment_program *bitmap_fp;
-      struct gl_vertex_program *passthrough_vp;
-
-      struct gl_fragment_program *saved_fp;
-      GLboolean saved_fp_enable;
-      struct gl_vertex_program *saved_vp;
-      GLboolean saved_vp_enable;
-
-      GLint saved_vp_x, saved_vp_y;
-      GLsizei saved_vp_width, saved_vp_height;
-      GLenum saved_matrix_mode;
-   } meta;
+   struct dri_metaops meta;
 
    GLint refcount;
    GLuint Fallback;
@@ -182,7 +180,6 @@ struct intel_context
    struct intel_region *front_region;
    struct intel_region *back_region;
    struct intel_region *depth_region;
-   GLboolean internal_viewport_call;
 
    /**
     * This value indicates that the kernel memory manager is being used
@@ -216,13 +213,6 @@ struct intel_context
    GLuint ClearColor565;
    GLuint ClearColor8888;
 
-   /* info for intel_clear_tris() */
-   struct
-   {
-      struct gl_array_object *arrayObj;
-      GLfloat vertices[4][3];
-      GLfloat color[4][4];
-   } clear;
 
    /* Offsets of fields within the current vertex:
     */
@@ -304,6 +294,9 @@ struct intel_context
     */
    GLboolean is_front_buffer_reading;
 
+   GLboolean use_texture_tiling;
+   GLboolean use_early_z;
+
    drm_clip_rect_t fboRect;     /**< cliprect for FBO rendering */
 
    int perf_boxes;
@@ -325,7 +318,7 @@ struct intel_context
    __DRIdrawablePrivate *driReadDrawable;
    __DRIscreenPrivate *driScreen;
    intelScreenPrivate *intelScreen;
-   volatile struct drm_i915_sarea *sarea;
+   volatile drm_i915_sarea_t *sarea;
 
    GLuint lastStamp;
 
@@ -483,6 +476,8 @@ extern void intelFlush(GLcontext * ctx);
 
 extern void intelInitDriverFunctions(struct dd_function_table *functions);
 
+void intel_init_syncobj_functions(struct dd_function_table *functions);
+
 
 /* ================================================================
  * intel_state.c:
@@ -558,6 +553,9 @@ void intel_viewport(GLcontext * ctx, GLint x, GLint y,
 void intel_update_renderbuffers(__DRIcontext *context,
 				__DRIdrawable *drawable);
 
+void i915_set_buf_info_for_region(uint32_t *state, struct intel_region *region,
+				  uint32_t buffer_id);
+
 /*======================================================================
  * Inline conversion functions.  
  * These are better-typed than the macros used previously:
diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c
index 9ec1b4ec2f..2e61c556d8 100644
--- a/src/mesa/drivers/dri/intel/intel_extensions.c
+++ b/src/mesa/drivers/dri/intel/intel_extensions.c
@@ -30,10 +30,14 @@
 #include "intel_extensions.h"
 
 
+#define need_GL_ARB_copy_buffer
 #define need_GL_ARB_framebuffer_object
+#define need_GL_ARB_map_buffer_range
 #define need_GL_ARB_occlusion_query
 #define need_GL_ARB_point_parameters
 #define need_GL_ARB_shader_objects
+#define need_GL_ARB_sync
+#define need_GL_ARB_vertex_array_object
 #define need_GL_ARB_vertex_program
 #define need_GL_ARB_vertex_shader
 #define need_GL_ARB_window_pos
@@ -45,9 +49,12 @@
 #define need_GL_EXT_fog_coord
 #define need_GL_EXT_framebuffer_object
 #define need_GL_EXT_framebuffer_blit
+#define need_GL_EXT_gpu_program_parameters
 #define need_GL_EXT_point_parameters
+#define need_GL_EXT_provoking_vertex
 #define need_GL_EXT_secondary_color
 #define need_GL_EXT_stencil_two_side
+#define need_GL_APPLE_vertex_array_object
 #define need_GL_ATI_separate_stencil
 #define need_GL_ATI_envmap_bumpmap
 #define need_GL_NV_point_sprite
@@ -65,8 +72,13 @@
  * i965_dri.
  */
 static const struct dri_extension card_extensions[] = {
+   { "GL_ARB_copy_buffer",                GL_ARB_copy_buffer_functions },
+   { "GL_ARB_half_float_pixel",           NULL },
+   { "GL_ARB_map_buffer_range",           GL_ARB_map_buffer_range_functions },
    { "GL_ARB_multitexture",               NULL },
    { "GL_ARB_point_parameters",           GL_ARB_point_parameters_functions },
+   { "GL_ARB_point_sprite",               NULL },
+   { "GL_ARB_sync",                       GL_ARB_sync_functions },
    { "GL_ARB_texture_border_clamp",       NULL },
    { "GL_ARB_texture_cube_map",           NULL },
    { "GL_ARB_texture_env_add",            NULL },
@@ -75,6 +87,7 @@ static const struct dri_extension card_extensions[] = {
    { "GL_ARB_texture_env_dot3",           NULL },
    { "GL_ARB_texture_mirrored_repeat",    NULL },
    { "GL_ARB_texture_rectangle",          NULL },
+   { "GL_ARB_vertex_array_object",        GL_ARB_vertex_array_object_functions},
    { "GL_ARB_vertex_program",             GL_ARB_vertex_program_functions },
    { "GL_ARB_window_pos",                 GL_ARB_window_pos_functions },
    { "GL_EXT_blend_color",                GL_EXT_blend_color_functions },
@@ -85,7 +98,9 @@ static const struct dri_extension card_extensions[] = {
    { "GL_EXT_blend_subtract",             NULL },
    { "GL_EXT_cull_vertex",                GL_EXT_cull_vertex_functions },
    { "GL_EXT_fog_coord",                  GL_EXT_fog_coord_functions },
+   { "GL_EXT_gpu_program_parameters",     GL_EXT_gpu_program_parameters_functions },
    { "GL_EXT_packed_depth_stencil",       NULL },
+   { "GL_EXT_provoking_vertex",           GL_EXT_provoking_vertex_functions },
    { "GL_EXT_secondary_color",            GL_EXT_secondary_color_functions },
    { "GL_EXT_stencil_wrap",               NULL },
    { "GL_EXT_texture_edge_clamp",         NULL },
@@ -95,6 +110,7 @@ static const struct dri_extension card_extensions[] = {
    { "GL_EXT_texture_lod_bias",           NULL },
    { "GL_3DFX_texture_compression_FXT1",  NULL },
    { "GL_APPLE_client_storage",           NULL },
+   { "GL_APPLE_vertex_array_object",      GL_APPLE_vertex_array_object_functions},
    { "GL_MESA_pack_invert",               NULL },
    { "GL_MESA_ycbcr_texture",             NULL },
    { "GL_NV_blend_square",                NULL },
@@ -112,8 +128,10 @@ static const struct dri_extension i915_extensions[] = {
    { "GL_ARB_fragment_program",           NULL },
    { "GL_ARB_shadow",                     NULL },
    { "GL_ARB_texture_non_power_of_two",   NULL },
+   { "GL_ATI_separate_stencil",           GL_ATI_separate_stencil_functions },
    { "GL_ATI_texture_env_combine3",       NULL },
    { "GL_EXT_shadow_funcs",               NULL },
+   { "GL_EXT_stencil_two_side",           GL_EXT_stencil_two_side_functions },
    { "GL_NV_texture_env_combine4",        NULL },
    { NULL,                                NULL }
 };
@@ -128,6 +146,7 @@ static const struct dri_extension brw_extensions[] = {
    { "GL_ARB_framebuffer_object",         GL_ARB_framebuffer_object_functions},
    { "GL_ARB_occlusion_query",            GL_ARB_occlusion_query_functions },
    { "GL_ARB_point_sprite", 		  NULL },
+   { "GL_ARB_seamless_cube_map",          NULL },
    { "GL_ARB_shader_objects",             GL_ARB_shader_objects_functions },
    { "GL_ARB_shading_language_100",       GL_VERSION_2_0_functions },
    { "GL_ARB_shading_language_120",       GL_VERSION_2_1_functions },
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index ed7c78e06c..804c034840 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -35,6 +35,7 @@
 #include "main/context.h"
 #include "main/texformat.h"
 #include "main/texrender.h"
+#include "drivers/common/meta.h"
 
 #include "intel_context.h"
 #include "intel_buffers.h"
@@ -217,7 +218,8 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
       DBG("Allocating %d x %d Intel RBO (pitch %d)\n", width,
 	  height, pitch);
 
-      irb->region = intel_region_alloc(intel, cpp, width, height, pitch,
+      irb->region = intel_region_alloc(intel, I915_TILING_NONE,
+				       cpp, width, height, pitch,
 				       GL_TRUE);
       if (!irb->region)
          return GL_FALSE;       /* out of memory? */
@@ -575,9 +577,10 @@ intel_render_texture(GLcontext * ctx,
 
    ASSERT(newImage);
 
-   if (newImage->Border != 0) {
-      /* Fallback on drawing to a texture with a border, which won't have a
-       * miptree.
+   intel_image = intel_texture_image(newImage);
+   if (!intel_image->mt) {
+      /* Fallback on drawing to a texture that doesn't have a miptree
+       * (has a border, width/height 0, etc.)
        */
       _mesa_reference_renderbuffer(&att->Renderbuffer, NULL);
       _mesa_render_texture(ctx, fb, att);
@@ -608,7 +611,6 @@ intel_render_texture(GLcontext * ctx,
        irb->Base.RefCount);
 
    /* point the renderbufer's region to the texture image region */
-   intel_image = intel_texture_image(newImage);
    if (irb->region != intel_image->mt->region) {
       if (irb->region)
 	 intel_region_release(&irb->region);
@@ -699,74 +701,6 @@ intel_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb)
 
 
 /**
- * Called from glBlitFramebuffer().
- * For now, we're doing an approximation with glCopyPixels().
- * XXX we need to bypass all the per-fragment operations, except scissor.
- */
-static void
-intel_blit_framebuffer(GLcontext *ctx,
-                       GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
-                       GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
-                       GLbitfield mask, GLenum filter)
-{
-   const GLfloat xZoomSave = ctx->Pixel.ZoomX;
-   const GLfloat yZoomSave = ctx->Pixel.ZoomY;
-   GLsizei width, height;
-   GLfloat xFlip = 1.0F, yFlip = 1.0F;
-
-   if (srcX1 < srcX0) {
-      GLint tmp = srcX1;
-      srcX1 = srcX0;
-      srcX0 = tmp;
-      xFlip = -1.0F;
-   }
-
-   if (srcY1 < srcY0) {
-      GLint tmp = srcY1;
-      srcY1 = srcY0;
-      srcY0 = tmp;
-      yFlip = -1.0F;
-   }
-
-   width = srcX1 - srcX0;
-   height = srcY1 - srcY0;
-
-   ctx->Pixel.ZoomX = xFlip * (dstX1 - dstX0) / (srcX1 - srcY0);
-   ctx->Pixel.ZoomY = yFlip * (dstY1 - dstY0) / (srcY1 - srcY0);
-
-   if (ctx->Pixel.ZoomX < 0.0F) {
-      dstX0 = MAX2(dstX0, dstX1);
-   }
-   else {
-      dstX0 = MIN2(dstX0, dstX1);
-   }
-
-   if (ctx->Pixel.ZoomY < 0.0F) {
-      dstY0 = MAX2(dstY0, dstY1);
-   }
-   else {
-      dstY0 = MIN2(dstY0, dstY1);
-   }
-
-   if (mask & GL_COLOR_BUFFER_BIT) {
-      ctx->Driver.CopyPixels(ctx, srcX0, srcY0, width, height,
-                             dstX0, dstY0, GL_COLOR);
-   }
-   if (mask & GL_DEPTH_BUFFER_BIT) {
-      ctx->Driver.CopyPixels(ctx, srcX0, srcY0, width, height,
-                             dstX0, dstY0, GL_DEPTH);
-   }
-   if (mask & GL_STENCIL_BUFFER_BIT) {
-      ctx->Driver.CopyPixels(ctx, srcX0, srcY0, width, height,
-                             dstX0, dstY0, GL_STENCIL);
-   }
-      
-   ctx->Pixel.ZoomX = xZoomSave;
-   ctx->Pixel.ZoomY = yZoomSave;
-}
-
-
-/**
  * Do one-time context initializations related to GL_EXT_framebuffer_object.
  * Hook in device driver functions.
  */
@@ -781,5 +715,5 @@ intel_fbo_init(struct intel_context *intel)
    intel->ctx.Driver.FinishRenderTexture = intel_finish_render_texture;
    intel->ctx.Driver.ResizeBuffers = intel_resize_buffers;
    intel->ctx.Driver.ValidateFramebuffer = intel_validate_framebuffer;
-   intel->ctx.Driver.BlitFramebuffer = intel_blit_framebuffer;
+   intel->ctx.Driver.BlitFramebuffer = _mesa_meta_blit_framebuffer;
 }
diff --git a/src/mesa/drivers/dri/intel/intel_generatemipmap.c b/src/mesa/drivers/dri/intel/intel_generatemipmap.c
new file mode 100644
index 0000000000..12059e122c
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_generatemipmap.c
@@ -0,0 +1,304 @@
+/*
+ * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "main/glheader.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/bufferobj.h"
+#include "main/teximage.h"
+#include "main/texenv.h"
+#include "main/texobj.h"
+#include "main/texstate.h"
+#include "main/texparam.h"
+#include "main/varray.h"
+#include "main/attrib.h"
+#include "main/enable.h"
+#include "main/buffers.h"
+#include "main/fbobject.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+#include "main/depth.h"
+#include "main/hash.h"
+#include "main/mipmap.h"
+#include "main/blend.h"
+#include "glapi/dispatch.h"
+#include "swrast/swrast.h"
+
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_batchbuffer.h"
+#include "intel_pixel.h"
+#include "intel_tex.h"
+#include "intel_mipmap_tree.h"
+
+static const char *intel_fp_tex2d =
+      "!!ARBfp1.0\n"
+      "TEX result.color, fragment.texcoord[0], texture[0], 2D;\n"
+      "END\n";
+
+static GLboolean
+intel_generate_mipmap_level(GLcontext *ctx, GLuint tex_name,
+			    int level, int width, int height)
+{
+   struct intel_context *intel = intel_context(ctx);
+   GLfloat vertices[4][2];
+   GLint status;
+
+   /* Set to source from the previous level */
+   _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, level - 1);
+   _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, level - 1);
+
+   /* Set to draw into the current level */
+   _mesa_FramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
+				 GL_COLOR_ATTACHMENT0_EXT,
+				 GL_TEXTURE_2D,
+				 tex_name,
+				 level);
+   /* Choose to render to the color attachment. */
+   _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
+
+   status = _mesa_CheckFramebufferStatusEXT (GL_FRAMEBUFFER_EXT);
+   if (status != GL_FRAMEBUFFER_COMPLETE_EXT)
+      return GL_FALSE;
+
+   meta_set_passthrough_transform(&intel->meta);
+
+   /* XXX: Doing it right would involve setting up the transformation to do
+    * 0-1 mapping or something, and not changing the vertex data.
+    */
+   vertices[0][0] = 0;
+   vertices[0][1] = 0;
+   vertices[1][0] = width;
+   vertices[1][1] = 0;
+   vertices[2][0] = width;
+   vertices[2][1] = height;
+   vertices[3][0] = 0;
+   vertices[3][1] = height;
+
+   _mesa_VertexPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &vertices);
+   _mesa_Enable(GL_VERTEX_ARRAY);
+   meta_set_default_texrect(&intel->meta);
+
+   _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
+
+   meta_restore_texcoords(&intel->meta);
+   meta_restore_transform(&intel->meta);
+
+   return GL_TRUE;
+}
+
+static GLboolean
+intel_generate_mipmap_2d(GLcontext *ctx,
+			 GLenum target,
+			 struct gl_texture_object *texObj)
+{
+   struct intel_context *intel = intel_context(ctx);
+   GLint old_active_texture;
+   int level, max_levels, start_level, end_level;
+   GLuint fb_name;
+   GLboolean success = GL_FALSE;
+   struct gl_framebuffer *saved_fbo = NULL;
+   struct gl_buffer_object *saved_array_buffer = NULL;
+   struct gl_buffer_object *saved_element_buffer = NULL;
+
+   _mesa_PushAttrib(GL_ENABLE_BIT | GL_TEXTURE_BIT |
+		    GL_CURRENT_BIT | GL_COLOR_BUFFER_BIT |
+		    GL_DEPTH_BUFFER_BIT);
+   _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT);
+   old_active_texture = ctx->Texture.CurrentUnit;
+   _mesa_reference_framebuffer(&saved_fbo, ctx->DrawBuffer);
+
+   /* use default array/index buffers */
+   _mesa_reference_buffer_object(ctx, &saved_array_buffer,
+                                 ctx->Array.ArrayBufferObj);
+   _mesa_reference_buffer_object(ctx, &ctx->Array.ArrayBufferObj,
+                                 ctx->Shared->NullBufferObj);   
+   _mesa_reference_buffer_object(ctx, &saved_element_buffer,
+                                 ctx->Array.ElementArrayBufferObj);
+   _mesa_reference_buffer_object(ctx, &ctx->Array.ElementArrayBufferObj,
+                                 ctx->Shared->NullBufferObj);   
+
+   _mesa_Disable(GL_POLYGON_STIPPLE);
+   _mesa_Disable(GL_DEPTH_TEST);
+   _mesa_Disable(GL_STENCIL_TEST);
+   _mesa_ColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
+   _mesa_DepthMask(GL_FALSE);
+
+   /* Bind the given texture to GL_TEXTURE_2D with linear filtering for our
+    * minification.
+    */
+   _mesa_ActiveTextureARB(GL_TEXTURE0_ARB);
+   _mesa_Enable(GL_TEXTURE_2D);
+   _mesa_BindTexture(GL_TEXTURE_2D, texObj->Name);
+   _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER,
+		       GL_LINEAR_MIPMAP_NEAREST);
+   _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+
+   /* Bind the new renderbuffer to the color attachment point. */
+   _mesa_GenFramebuffersEXT(1, &fb_name);
+   _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, fb_name);
+
+   meta_set_fragment_program(&intel->meta, &intel->meta.tex2d_fp,
+			     intel_fp_tex2d);
+   meta_set_passthrough_vertex_program(&intel->meta);
+
+   max_levels = _mesa_max_texture_levels(ctx, texObj->Target);
+   start_level = texObj->BaseLevel;
+   end_level = texObj->MaxLevel;
+
+   /* Loop generating level+1 from level. */
+   for (level = start_level; level < end_level && level < max_levels - 1; level++) {
+      const struct gl_texture_image *srcImage;
+      int width, height;
+
+      srcImage = _mesa_select_tex_image(ctx, texObj, target, level);
+      if (srcImage->Border != 0)
+	 goto fail;
+
+      width = srcImage->Width / 2;
+      if (width < 1)
+	 width = 1;
+      height = srcImage->Height / 2;
+      if (height < 1)
+	 height = 1;
+
+      if (width == srcImage->Width &&
+	  height == srcImage->Height) {
+	 /* Neither _mesa_max_texture_levels nor texObj->MaxLevel are the
+	  * maximum texture level for the object, so break out when we've gone
+	  * over the edge.
+	  */
+	 break;
+      }
+
+      /* Make sure that there's space allocated for the target level.
+       * We could skip this if there's already space allocated and save some
+       * time.
+       */
+      _mesa_TexImage2D(GL_TEXTURE_2D, level + 1, srcImage->InternalFormat,
+		       width, height, 0,
+		       GL_RGBA, GL_UNSIGNED_INT, NULL);
+
+      if (!intel_generate_mipmap_level(ctx, texObj->Name, level + 1,
+				       width, height))
+	 goto fail;
+   }
+
+   success = GL_TRUE;
+
+fail:
+   meta_restore_fragment_program(&intel->meta);
+   meta_restore_vertex_program(&intel->meta);
+
+   /* restore array/index buffers */
+   _mesa_reference_buffer_object(ctx, &ctx->Array.ArrayBufferObj,
+                                 saved_array_buffer);
+   _mesa_reference_buffer_object(ctx, &saved_array_buffer, NULL);
+   _mesa_reference_buffer_object(ctx, &ctx->Array.ElementArrayBufferObj,
+                                 saved_element_buffer);
+   _mesa_reference_buffer_object(ctx, &saved_element_buffer, NULL);
+
+
+   _mesa_DeleteFramebuffersEXT(1, &fb_name);
+   _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture);
+   if (saved_fbo)
+      _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, saved_fbo->Name);
+   _mesa_reference_framebuffer(&saved_fbo, NULL);
+   _mesa_PopClientAttrib();
+   _mesa_PopAttrib();
+
+   return success;
+}
+
+
+/**
+ * Generate new mipmap data from BASE+1 to BASE+p (the minimally-sized mipmap
+ * level).
+ *
+ * The texture object's miptree must be mapped.
+ *
+ * It would be really nice if this was just called by Mesa whenever mipmaps
+ * needed to be regenerated, rather than us having to remember to do so in
+ * each texture image modification path.
+ *
+ * This function should also include an accelerated path.
+ */
+void
+intel_generate_mipmap(GLcontext *ctx, GLenum target,
+                      struct gl_texture_object *texObj)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_texture_object *intelObj = intel_texture_object(texObj);
+   GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+   int face, i;
+
+   /* HW path */
+   if (target == GL_TEXTURE_2D &&
+       ctx->Extensions.EXT_framebuffer_object &&
+       ctx->Extensions.ARB_fragment_program &&
+       ctx->Extensions.ARB_vertex_program) {
+      GLboolean success;
+
+      /* We'll be accessing this texture using GL entrypoints, which should
+       * be resilient against other access to this texture.
+       */
+      _mesa_unlock_texture(ctx, texObj);
+      success = intel_generate_mipmap_2d(ctx, target, texObj);
+      _mesa_lock_texture(ctx, texObj);
+
+      if (success)
+	 return;
+   }
+
+   /* SW path */
+   intel_tex_map_level_images(intel, intelObj, texObj->BaseLevel);
+   _mesa_generate_mipmap(ctx, target, texObj);
+   intel_tex_unmap_level_images(intel, intelObj, texObj->BaseLevel);
+
+   /* Update the level information in our private data in the new images, since
+    * it didn't get set as part of a normal TexImage path.
+    */
+   for (face = 0; face < nr_faces; face++) {
+      for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) {
+         struct intel_texture_image *intelImage;
+
+	 intelImage = intel_texture_image(texObj->Image[face][i]);
+	 if (intelImage == NULL)
+	    break;
+
+	 intelImage->level = i;
+	 intelImage->face = face;
+	 /* Unreference the miptree to signal that the new Data is a bare
+	  * pointer from mesa.
+	  */
+	 intel_miptree_release(intel, &intelImage->mt);
+      }
+   }
+}
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
index 6e1e034e53..c985da5aa2 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -57,14 +57,16 @@ intel_miptree_create_internal(struct intel_context *intel,
 			      GLuint last_level,
 			      GLuint width0,
 			      GLuint height0,
-			      GLuint depth0, GLuint cpp, GLuint compress_byte)
+			      GLuint depth0, GLuint cpp, GLuint compress_byte,
+			      uint32_t tiling)
 {
    GLboolean ok;
    struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
 
-   DBG("%s target %s format %s level %d..%d\n", __FUNCTION__,
+   DBG("%s target %s format %s level %d..%d <-- %p\n", __FUNCTION__,
        _mesa_lookup_enum_by_nr(target),
-       _mesa_lookup_enum_by_nr(internal_format), first_level, last_level);
+       _mesa_lookup_enum_by_nr(internal_format), 
+       first_level, last_level, mt);
 
    mt->target = target_to_target(target);
    mt->internal_format = internal_format;
@@ -80,15 +82,16 @@ intel_miptree_create_internal(struct intel_context *intel,
 
 #ifdef I915
    if (IS_945(intel->intelScreen->deviceID))
-      ok = i945_miptree_layout(intel, mt);
+      ok = i945_miptree_layout(intel, mt, tiling);
    else
-      ok = i915_miptree_layout(intel, mt);
+      ok = i915_miptree_layout(intel, mt, tiling);
 #else
-   ok = brw_miptree_layout(intel, mt);
+   ok = brw_miptree_layout(intel, mt, tiling);
 #endif
 
    if (!ok) {
       free(mt);
+      DBG("%s not okay - returning NULL\n", __FUNCTION__);
       return NULL;
    }
 
@@ -98,6 +101,7 @@ intel_miptree_create_internal(struct intel_context *intel,
 struct intel_mipmap_tree *
 intel_miptree_create(struct intel_context *intel,
 		     GLenum target,
+		     GLenum base_format,
 		     GLenum internal_format,
 		     GLuint first_level,
 		     GLuint last_level,
@@ -107,10 +111,23 @@ intel_miptree_create(struct intel_context *intel,
 		     GLboolean expect_accelerated_upload)
 {
    struct intel_mipmap_tree *mt;
+   uint32_t tiling;
+
+   if (intel->use_texture_tiling && compress_byte == 0 &&
+       intel->intelScreen->kernel_exec_fencing) {
+      if (IS_965(intel->intelScreen->deviceID) &&
+	  (base_format == GL_DEPTH_COMPONENT ||
+	   base_format == GL_DEPTH_STENCIL_EXT))
+	 tiling = I915_TILING_Y;
+      else
+	 tiling = I915_TILING_X;
+   } else
+      tiling = I915_TILING_NONE;
 
    mt = intel_miptree_create_internal(intel, target, internal_format,
 				      first_level, last_level, width0,
-				      height0, depth0, cpp, compress_byte);
+				      height0, depth0, cpp, compress_byte,
+				      tiling);
    /*
     * pitch == 0 || height == 0  indicates the null texture
     */
@@ -118,6 +135,7 @@ intel_miptree_create(struct intel_context *intel,
       return NULL;
 
    mt->region = intel_region_alloc(intel,
+				   tiling,
 				   mt->cpp,
 				   mt->pitch,
 				   mt->total_height,
@@ -147,7 +165,8 @@ intel_miptree_create_for_region(struct intel_context *intel,
    mt = intel_miptree_create_internal(intel, target, internal_format,
 				      first_level, last_level,
 				      region->width, region->height, 1,
-				      region->cpp, compress_byte);
+				      region->cpp, compress_byte,
+				      I915_TILING_NONE);
    if (!mt)
       return mt;
 #if 0
@@ -185,6 +204,7 @@ intel_miptree_create_for_region(struct intel_context *intel,
 
 int intel_miptree_pitch_align (struct intel_context *intel,
 			       struct intel_mipmap_tree *mt,
+			       uint32_t tiling,
 			       int pitch)
 {
 #ifdef I915
@@ -205,6 +225,11 @@ int intel_miptree_pitch_align (struct intel_context *intel,
 	 pitch_align = 4;
       }
 
+      if (tiling == I915_TILING_X)
+	 pitch_align = 512;
+      else if (tiling == I915_TILING_Y)
+	 pitch_align = 128;
+
       pitch = ALIGN(pitch * mt->cpp, pitch_align);
 
 #ifdef I915
@@ -328,23 +353,31 @@ intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
 }
 
 
-
 void
-intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
-			       GLuint level, GLuint img,
-			       GLuint x, GLuint y)
+intel_miptree_set_image_offset_ex(struct intel_mipmap_tree *mt,
+                                  GLuint level, GLuint img,
+                                  GLuint x, GLuint y, 
+                                  GLuint offset)
 {
    if (img == 0 && level == 0)
       assert(x == 0 && y == 0);
 
    assert(img < mt->level[level].nr_images);
 
-   mt->level[level].image_offset[img] = (x + y * mt->pitch) * mt->cpp;
+   mt->level[level].image_offset[img] = (x + y * mt->pitch) * mt->cpp + offset;
 
    DBG("%s level %d img %d pos %d,%d image_offset %x\n",
        __FUNCTION__, level, img, x, y, mt->level[level].image_offset[img]);
 }
 
+void
+intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
+			       GLuint level, GLuint img,
+			       GLuint x, GLuint y)
+{
+    intel_miptree_set_image_offset_ex(mt, level, img, x, y, 0);
+}
+
 
 /* Although we use the image_offset[] array to store relative offsets
  * to cube faces, Mesa doesn't know anything about this and expects
@@ -454,11 +487,11 @@ intel_miptree_image_data(struct intel_context *intel,
 			0, 0,                             /* source x, y */
 			dst->level[level].width, height); /* width, height */
 
-      src += src_image_pitch * dst->cpp;
+      src = (char *)src + src_image_pitch * dst->cpp;
    }
 }
 
-extern GLuint intel_compressed_alignment(GLenum);
+extern void intel_get_texture_alignment_unit(GLenum, GLuint *, GLuint *);
 /* Copy mipmap image between trees
  */
 void
@@ -475,20 +508,37 @@ intel_miptree_image_copy(struct intel_context *intel,
    const GLuint *dst_depth_offset = intel_miptree_depth_offsets(dst, level);
    const GLuint *src_depth_offset = intel_miptree_depth_offsets(src, level);
    GLuint i;
+   GLboolean success;
 
    if (dst->compressed) {
-       GLuint alignment = intel_compressed_alignment(dst->internal_format);
+       GLuint align_w, align_h;
+
+       intel_get_texture_alignment_unit(dst->internal_format, &align_w, &align_h);
        height = (height + 3) / 4;
-       width = ((width + alignment - 1) & ~(alignment - 1));
+       width = ALIGN(width, align_w);
    }
 
    for (i = 0; i < depth; i++) {
-      intel_region_copy(intel,
-                        dst->region, dst_offset + dst_depth_offset[i],
-                        0,
-                        0,
-                        src->region, src_offset + src_depth_offset[i],
-                        0, 0, width, height);
+      success = intel_region_copy(intel,
+				  dst->region, dst_offset + dst_depth_offset[i],
+				  0, 0,
+				  src->region, src_offset + src_depth_offset[i],
+				  0, 0, width, height, GL_COPY);
+      if (!success) {
+	 GLubyte *src_ptr, *dst_ptr;
+
+	 src_ptr = intel_region_map(intel, src->region);
+	 dst_ptr = intel_region_map(intel, dst->region);
+
+	 _mesa_copy_rect(dst_ptr + dst_offset + dst_depth_offset[i],
+			 dst->cpp,
+			 dst->pitch,
+			 0, 0, width, height,
+			 src_ptr + src_offset + src_depth_offset[i],
+			 src->pitch,
+			 0, 0);
+	 intel_region_unmap(intel, src->region);
+	 intel_region_unmap(intel, dst->region);
+      }
    }
-
 }
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
index 4060b9df78..c890b2a0d0 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
@@ -126,6 +126,7 @@ struct intel_mipmap_tree
 
 struct intel_mipmap_tree *intel_miptree_create(struct intel_context *intel,
                                                GLenum target,
+                                               GLenum base_format,
                                                GLenum internal_format,
                                                GLuint first_level,
                                                GLuint last_level,
@@ -148,6 +149,7 @@ intel_miptree_create_for_region(struct intel_context *intel,
 
 int intel_miptree_pitch_align (struct intel_context *intel,
 			       struct intel_mipmap_tree *mt,
+			       uint32_t tiling,
 			       int pitch);
 
 void intel_miptree_reference(struct intel_mipmap_tree **dst,
@@ -194,6 +196,11 @@ void intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
                                   GLuint x, GLuint y,
                                   GLuint w, GLuint h, GLuint d);
 
+void intel_miptree_set_image_offset_ex(struct intel_mipmap_tree *mt,
+                                       GLuint level,
+                                       GLuint img, GLuint x, GLuint y,
+                                       GLuint offset);
+
 void intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
                                     GLuint level,
                                     GLuint img, GLuint x, GLuint y);
@@ -218,10 +225,13 @@ void intel_miptree_image_copy(struct intel_context *intel,
 /* i915_mipmap_tree.c:
  */
 GLboolean i915_miptree_layout(struct intel_context *intel,
-			      struct intel_mipmap_tree *mt);
+			      struct intel_mipmap_tree *mt,
+			      uint32_t tiling);
 GLboolean i945_miptree_layout(struct intel_context *intel,
-			      struct intel_mipmap_tree *mt);
+			      struct intel_mipmap_tree *mt,
+			      uint32_t tiling);
 GLboolean brw_miptree_layout(struct intel_context *intel,
-			     struct intel_mipmap_tree *mt);
+			     struct intel_mipmap_tree *mt,
+			     uint32_t tiling);
 
 #endif
diff --git a/src/mesa/drivers/dri/intel/intel_pixel.c b/src/mesa/drivers/dri/intel/intel_pixel.c
index defb80f85d..a300141655 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel.c
@@ -27,9 +27,12 @@
 
 #include "main/enums.h"
 #include "main/state.h"
+#include "main/bufferobj.h"
 #include "main/context.h"
 #include "main/enable.h"
 #include "main/matrix.h"
+#include "main/texstate.h"
+#include "main/varray.h"
 #include "main/viewport.h"
 #include "swrast/swrast.h"
 #include "shader/arbprogram.h"
@@ -174,167 +177,6 @@ intel_check_blit_format(struct intel_region * region,
 }
 
 void
-intel_meta_set_passthrough_transform(struct intel_context *intel)
-{
-   GLcontext *ctx = &intel->ctx;
-
-   intel->meta.saved_vp_x = ctx->Viewport.X;
-   intel->meta.saved_vp_y = ctx->Viewport.Y;
-   intel->meta.saved_vp_width = ctx->Viewport.Width;
-   intel->meta.saved_vp_height = ctx->Viewport.Height;
-   intel->meta.saved_matrix_mode = ctx->Transform.MatrixMode;
-
-   intel->internal_viewport_call = GL_TRUE;
-   _mesa_Viewport(0, 0, ctx->DrawBuffer->Width, ctx->DrawBuffer->Height);
-   intel->internal_viewport_call = GL_FALSE;
-
-   _mesa_MatrixMode(GL_PROJECTION);
-   _mesa_PushMatrix();
-   _mesa_LoadIdentity();
-   _mesa_Ortho(0, ctx->DrawBuffer->Width, 0, ctx->DrawBuffer->Height, 1, -1);
-
-   _mesa_MatrixMode(GL_MODELVIEW);
-   _mesa_PushMatrix();
-   _mesa_LoadIdentity();
-}
-
-void
-intel_meta_restore_transform(struct intel_context *intel)
-{
-   _mesa_MatrixMode(GL_PROJECTION);
-   _mesa_PopMatrix();
-   _mesa_MatrixMode(GL_MODELVIEW);
-   _mesa_PopMatrix();
-
-   _mesa_MatrixMode(intel->meta.saved_matrix_mode);
-
-   intel->internal_viewport_call = GL_TRUE;
-   _mesa_Viewport(intel->meta.saved_vp_x, intel->meta.saved_vp_y,
-		  intel->meta.saved_vp_width, intel->meta.saved_vp_height);
-   intel->internal_viewport_call = GL_FALSE;
-}
-
-/**
- * Set up a vertex program to pass through the position and first texcoord
- * for pixel path.
- */
-void
-intel_meta_set_passthrough_vertex_program(struct intel_context *intel)
-{
-   GLcontext *ctx = &intel->ctx;
-   static const char *vp =
-      "!!ARBvp1.0\n"
-      "TEMP vertexClip;\n"
-      "DP4 vertexClip.x, state.matrix.mvp.row[0], vertex.position;\n"
-      "DP4 vertexClip.y, state.matrix.mvp.row[1], vertex.position;\n"
-      "DP4 vertexClip.z, state.matrix.mvp.row[2], vertex.position;\n"
-      "DP4 vertexClip.w, state.matrix.mvp.row[3], vertex.position;\n"
-      "MOV result.position, vertexClip;\n"
-      "MOV result.texcoord[0], vertex.texcoord[0];\n"
-      "MOV result.color, vertex.color;\n"
-      "END\n";
-
-   assert(intel->meta.saved_vp == NULL);
-
-   _mesa_reference_vertprog(ctx, &intel->meta.saved_vp,
-			    ctx->VertexProgram.Current);
-   if (intel->meta.passthrough_vp == NULL) {
-      GLuint prog_name;
-      _mesa_GenPrograms(1, &prog_name);
-      _mesa_BindProgram(GL_VERTEX_PROGRAM_ARB, prog_name);
-      _mesa_ProgramStringARB(GL_VERTEX_PROGRAM_ARB,
-			     GL_PROGRAM_FORMAT_ASCII_ARB,
-			     strlen(vp), (const GLubyte *)vp);
-      _mesa_reference_vertprog(ctx, &intel->meta.passthrough_vp,
-			       ctx->VertexProgram.Current);
-      _mesa_DeletePrograms(1, &prog_name);
-   }
-
-   FLUSH_VERTICES(ctx, _NEW_PROGRAM);
-   _mesa_reference_vertprog(ctx, &ctx->VertexProgram.Current,
-			    intel->meta.passthrough_vp);
-   ctx->Driver.BindProgram(ctx, GL_VERTEX_PROGRAM_ARB,
-			   &intel->meta.passthrough_vp->Base);
-
-   intel->meta.saved_vp_enable = ctx->VertexProgram.Enabled;
-   _mesa_Enable(GL_VERTEX_PROGRAM_ARB);
-}
-
-/**
- * Restores the previous vertex program after
- * intel_meta_set_passthrough_vertex_program()
- */
-void
-intel_meta_restore_vertex_program(struct intel_context *intel)
-{
-   GLcontext *ctx = &intel->ctx;
-
-   FLUSH_VERTICES(ctx, _NEW_PROGRAM);
-   _mesa_reference_vertprog(ctx, &ctx->VertexProgram.Current,
-			    intel->meta.saved_vp);
-   _mesa_reference_vertprog(ctx, &intel->meta.saved_vp, NULL);
-   ctx->Driver.BindProgram(ctx, GL_VERTEX_PROGRAM_ARB,
-			   &ctx->VertexProgram.Current->Base);
-
-   if (!intel->meta.saved_vp_enable)
-      _mesa_Disable(GL_VERTEX_PROGRAM_ARB);
-}
-
-/**
- * Binds the given program string to GL_FRAGMENT_PROGRAM_ARB, caching the
- * program object.
- */
-void
-intel_meta_set_fragment_program(struct intel_context *intel,
-				struct gl_fragment_program **prog,
-				const char *prog_string)
-{
-   GLcontext *ctx = &intel->ctx;
-   assert(intel->meta.saved_fp == NULL);
-
-   _mesa_reference_fragprog(ctx, &intel->meta.saved_fp,
-			    ctx->FragmentProgram.Current);
-   if (*prog == NULL) {
-      GLuint prog_name;
-      _mesa_GenPrograms(1, &prog_name);
-      _mesa_BindProgram(GL_FRAGMENT_PROGRAM_ARB, prog_name);
-      _mesa_ProgramStringARB(GL_FRAGMENT_PROGRAM_ARB,
-			     GL_PROGRAM_FORMAT_ASCII_ARB,
-			     strlen(prog_string), (const GLubyte *)prog_string);
-      _mesa_reference_fragprog(ctx, prog, ctx->FragmentProgram.Current);
-      /* Note that DeletePrograms unbinds the program on us */
-      _mesa_DeletePrograms(1, &prog_name);
-   }
-
-   FLUSH_VERTICES(ctx, _NEW_PROGRAM);
-   _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, *prog);
-   ctx->Driver.BindProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, &((*prog)->Base));
-
-   intel->meta.saved_fp_enable = ctx->FragmentProgram.Enabled;
-   _mesa_Enable(GL_FRAGMENT_PROGRAM_ARB);
-}
-
-/**
- * Restores the previous fragment program after
- * intel_meta_set_fragment_program()
- */
-void
-intel_meta_restore_fragment_program(struct intel_context *intel)
-{
-   GLcontext *ctx = &intel->ctx;
-
-   FLUSH_VERTICES(ctx, _NEW_PROGRAM);
-   _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current,
-			    intel->meta.saved_fp);
-   _mesa_reference_fragprog(ctx, &intel->meta.saved_fp, NULL);
-   ctx->Driver.BindProgram(ctx, GL_FRAGMENT_PROGRAM_ARB,
-			   &ctx->FragmentProgram.Current->Base);
-
-   if (!intel->meta.saved_fp_enable)
-      _mesa_Disable(GL_FRAGMENT_PROGRAM_ARB);
-}
-
-void
 intelInitPixelFuncs(struct dd_function_table *functions)
 {
    functions->Accum = _swrast_Accum;
@@ -346,12 +188,3 @@ intelInitPixelFuncs(struct dd_function_table *functions)
    functions->ReadPixels = intelReadPixels;
 }
 
-void
-intel_free_pixel_state(struct intel_context *intel)
-{
-   GLcontext *ctx = &intel->ctx;
-
-   _mesa_reference_vertprog(ctx, &intel->meta.passthrough_vp, NULL);
-   _mesa_reference_fragprog(ctx, &intel->meta.bitmap_fp, NULL);
-}
-
diff --git a/src/mesa/drivers/dri/intel/intel_pixel.h b/src/mesa/drivers/dri/intel/intel_pixel.h
index cb41fa182c..96a6dd17b2 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel.h
+++ b/src/mesa/drivers/dri/intel/intel_pixel.h
@@ -31,16 +31,6 @@
 #include "main/mtypes.h"
 
 void intelInitPixelFuncs(struct dd_function_table *functions);
-void intel_meta_set_passthrough_transform(struct intel_context *intel);
-void intel_meta_restore_transform(struct intel_context *intel);
-void intel_meta_set_passthrough_vertex_program(struct intel_context *intel);
-void intel_meta_restore_vertex_program(struct intel_context *intel);
-void intel_meta_set_fragment_program(struct intel_context *intel,
-				     struct gl_fragment_program **prog,
-				     const char *prog_string);
-void intel_meta_restore_fragment_program(struct intel_context *intel);
-void intel_free_pixel_state(struct intel_context *intel);
-
 GLboolean intel_check_blit_fragment_ops(GLcontext * ctx,
 					GLboolean src_alpha_is_one);
 
@@ -76,6 +66,4 @@ void intelBitmap(GLcontext * ctx,
 		 const struct gl_pixelstore_attrib *unpack,
 		 const GLubyte * pixels);
 
-void intel_clear_tris(GLcontext *ctx, GLbitfield mask);
-
 #endif
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
index d137aef13d..b543a0bbc3 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
@@ -42,6 +42,7 @@
 #include "main/varray.h"
 #include "main/attrib.h"
 #include "main/enable.h"
+#include "main/viewport.h"
 #include "shader/arbprogram.h"
 #include "glapi/dispatch.h"
 #include "swrast/swrast.h"
@@ -92,19 +93,12 @@ static const GLubyte *map_pbo( GLcontext *ctx,
    return ADD_POINTERS(buf, bitmap);
 }
 
-static GLboolean test_bit( const GLubyte *src,
-			    GLuint bit )
+static GLboolean test_bit( const GLubyte *src, GLuint bit )
 {
    return (src[bit/8] & (1<<(bit % 8))) ? 1 : 0;
 }
 
-static GLboolean test_msb_bit(const GLubyte *src, GLuint bit)
-{
-   return (src[bit/8] & (1<<(7 - (bit % 8)))) ? 1 : 0;
-}
-
-static void set_bit( GLubyte *dest,
-			  GLuint bit )
+static void set_bit( GLubyte *dest, GLuint bit )
 {
    dest[bit/8] |= 1 << (bit % 8);
 }
@@ -194,7 +188,7 @@ do_blit_bitmap( GLcontext *ctx,
    struct gl_framebuffer *fb = ctx->DrawBuffer;
    GLfloat tmpColor[4];
    GLubyte ubcolor[4];
-   GLuint color8888, color565;
+   GLuint color;
    unsigned int num_cliprects;
    drm_clip_rect_t *cliprects;
    int x_off, y_off;
@@ -232,8 +226,11 @@ do_blit_bitmap( GLcontext *ctx,
    UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[2], tmpColor[2]);
    UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[3], tmpColor[3]);
 
-   color8888 = INTEL_PACKCOLOR8888(ubcolor[0], ubcolor[1], ubcolor[2], ubcolor[3]);
-   color565 = INTEL_PACKCOLOR565(ubcolor[0], ubcolor[1], ubcolor[2]);
+   if (dst->cpp == 2)
+      color = INTEL_PACKCOLOR565(ubcolor[0], ubcolor[1], ubcolor[2]);
+   else
+      color = INTEL_PACKCOLOR8888(ubcolor[0], ubcolor[1],
+				  ubcolor[2], ubcolor[3]);
 
    if (!intel_check_blit_fragment_ops(ctx, tmpColor[3] == 1.0F))
       return GL_FALSE;
@@ -307,21 +304,21 @@ do_blit_bitmap( GLcontext *ctx,
 				   fb->Name == 0 ? GL_TRUE : GL_FALSE) == 0)
 		  continue;
 
-	       /* 
-		*/
-	       intelEmitImmediateColorExpandBlit( intel,
-						  dst->cpp,
-						  (GLubyte *)stipple, 
-						  sz,
-						  (dst->cpp == 2) ? color565 : color8888,
-						  dst->pitch,
-						  dst->buffer,
-						  0,
-						  dst->tiling,
-						  box_x + px,
-						  box_y + py,
-						  w, h,
-						  logic_op);
+	       if (!intelEmitImmediateColorExpandBlit(intel,
+						      dst->cpp,
+						      (GLubyte *)stipple,
+						      sz,
+						      color,
+						      dst->pitch,
+						      dst->buffer,
+						      0,
+						      dst->tiling,
+						      box_x + px,
+						      box_y + py,
+						      w, h,
+						      logic_op)) {
+		  return GL_FALSE;
+	       }
 	    } 
 	 } 
       }
@@ -360,11 +357,8 @@ intel_texture_bitmap(GLcontext * ctx,
       "END\n";
    GLuint texname;
    GLfloat vertices[4][4];
-   GLfloat texcoords[4][2];
    GLint old_active_texture;
-   GLubyte *unpacked_bitmap;
    GLubyte *a8_bitmap;
-   int x, y;
    GLfloat dst_z;
 
    /* We need a fragment program for the KIL effect */
@@ -424,22 +418,16 @@ intel_texture_bitmap(GLcontext * ctx,
       return GL_FALSE;
    }
 
-   /* Convert the A1 bitmap to an A8 format suitable for glTexImage */
    if (unpack->BufferObj->Name) {
       bitmap = map_pbo(ctx, width, height, unpack, bitmap);
       if (bitmap == NULL)
 	 return GL_TRUE;	/* even though this is an error, we're done */
    }
-   unpacked_bitmap = _mesa_unpack_bitmap(width, height, bitmap,
-					 unpack);
+
+   /* Convert the A1 bitmap to an A8 format suitable for glTexImage */
    a8_bitmap = _mesa_calloc(width * height);
-   for (y = 0; y < height; y++) {
-      for (x = 0; x < width; x++) {
-	 if (test_msb_bit(unpacked_bitmap, ALIGN(width, 8) * y + x))
-	    a8_bitmap[y * width + x] = 0xff;
-      }
-   }
-   _mesa_free(unpacked_bitmap);
+   _mesa_expand_bitmap(width, height, unpack, bitmap, a8_bitmap, width, 0xff);
+
    if (unpack->BufferObj->Name) {
       /* done with PBO so unmap it now */
       ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
@@ -473,15 +461,18 @@ intel_texture_bitmap(GLcontext * ctx,
 		    GL_ALPHA, GL_UNSIGNED_BYTE, a8_bitmap);
    _mesa_free(a8_bitmap);
 
-   intel_meta_set_fragment_program(intel, &intel->meta.bitmap_fp, fp);
+   meta_set_fragment_program(&intel->meta, &intel->meta.bitmap_fp, fp);
    _mesa_ProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 0,
 				     ctx->Current.RasterColor);
-   intel_meta_set_passthrough_vertex_program(intel);
-   intel_meta_set_passthrough_transform(intel);
+   meta_set_passthrough_vertex_program(&intel->meta);
+   meta_set_passthrough_transform(&intel->meta);
 
    /* convert rasterpos Z from [0,1] to NDC coord in [-1,1] */
    dst_z = -1.0 + 2.0 * ctx->Current.RasterPos[2];
 
+   /* RasterPos[2] already takes into account the DepthRange mapping. */
+   _mesa_DepthRange(0.0, 1.0);
+
    vertices[0][0] = dst_x;
    vertices[0][1] = dst_y;
    vertices[0][2] = dst_z;
@@ -499,25 +490,15 @@ intel_texture_bitmap(GLcontext * ctx,
    vertices[3][2] = dst_z;
    vertices[3][3] = 1.0;
 
-   texcoords[0][0] = 0.0;
-   texcoords[0][1] = 0.0;
-   texcoords[1][0] = 1.0;
-   texcoords[1][1] = 0.0;
-   texcoords[2][0] = 1.0;
-   texcoords[2][1] = 1.0;
-   texcoords[3][0] = 0.0;
-   texcoords[3][1] = 1.0;
-
    _mesa_VertexPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), &vertices);
-   _mesa_ClientActiveTextureARB(GL_TEXTURE0);
-   _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &texcoords);
    _mesa_Enable(GL_VERTEX_ARRAY);
-   _mesa_Enable(GL_TEXTURE_COORD_ARRAY);
+   meta_set_default_texrect(&intel->meta);
    _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
 
-   intel_meta_restore_transform(intel);
-   intel_meta_restore_fragment_program(intel);
-   intel_meta_restore_vertex_program(intel);
+   meta_restore_texcoords(&intel->meta);
+   meta_restore_transform(&intel->meta);
+   meta_restore_fragment_program(&intel->meta);
+   meta_restore_vertex_program(&intel->meta);
 
    _mesa_PopClientAttrib();
    _mesa_Disable(GL_TEXTURE_2D); /* asserted that it was disabled at entry */
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_copy.c b/src/mesa/drivers/dri/intel/intel_pixel_copy.c
index d50dd68092..07ca8f7ddb 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_copy.c
@@ -26,18 +26,13 @@
  **************************************************************************/
 
 #include "main/glheader.h"
-#include "main/enums.h"
 #include "main/image.h"
 #include "main/state.h"
 #include "main/mtypes.h"
-#include "main/macros.h"
-#include "swrast/swrast.h"
+#include "drivers/common/meta.h"
 
-#include "intel_screen.h"
 #include "intel_context.h"
-#include "intel_batchbuffer.h"
 #include "intel_buffers.h"
-#include "intel_blit.h"
 #include "intel_regions.h"
 #include "intel_pixel.h"
 
@@ -97,162 +92,6 @@ intel_check_copypixel_blit_fragment_ops(GLcontext * ctx)
 	    ctx->Color.BlendEnabled);
 }
 
-#ifdef I915
-/* Doesn't work for overlapping regions.  Could do a double copy or
- * just fallback.
- */
-static GLboolean
-do_texture_copypixels(GLcontext * ctx,
-                      GLint srcx, GLint srcy,
-                      GLsizei width, GLsizei height,
-                      GLint dstx, GLint dsty, GLenum type)
-{
-   struct intel_context *intel = intel_context(ctx);
-   struct intel_region *dst = intel_drawbuf_region(intel);
-   struct intel_region *src = copypix_src_region(intel, type);
-   GLenum src_format;
-   GLenum src_type;
-
-   DBG("%s %d,%d %dx%d --> %d,%d\n", __FUNCTION__, 
-       srcx, srcy, width, height, dstx, dsty);
-
-   if (!src || !dst || type != GL_COLOR)
-      return GL_FALSE;
-
-   if (ctx->_ImageTransferState) {
-      if (INTEL_DEBUG & DEBUG_PIXEL)
-         fprintf(stderr, "%s: check_color failed\n", __FUNCTION__);
-      return GL_FALSE;
-   }
-
-   /* Can't handle overlapping regions.  Don't have sufficient control
-    * over rasterization to pull it off in-place.  Punt on these for
-    * now.
-    * 
-    * XXX: do a copy to a temporary. 
-    */
-   if (src->buffer == dst->buffer) {
-      drm_clip_rect_t srcbox;
-      drm_clip_rect_t dstbox;
-      drm_clip_rect_t tmp;
-
-      srcbox.x1 = srcx;
-      srcbox.y1 = srcy;
-      srcbox.x2 = srcx + width;
-      srcbox.y2 = srcy + height;
-
-      if (ctx->Pixel.ZoomX > 0) {
-	 dstbox.x1 = dstx;
-	 dstbox.x2 = dstx + width * ctx->Pixel.ZoomX;
-      } else {
-	 dstbox.x1 = dstx + width * ctx->Pixel.ZoomX;
-	 dstbox.x2 = dstx;
-      }
-      if (ctx->Pixel.ZoomY > 0) {
-	 dstbox.y1 = dsty;
-	 dstbox.y2 = dsty + height * ctx->Pixel.ZoomY;
-      } else {
-	 dstbox.y1 = dsty + height * ctx->Pixel.ZoomY;
-	 dstbox.y2 = dsty;
-      }
-
-      DBG("src %d,%d %d,%d\n", srcbox.x1, srcbox.y1, srcbox.x2, srcbox.y2);
-      DBG("dst %d,%d %d,%d (%dx%d) (%f,%f)\n", dstbox.x1, dstbox.y1, dstbox.x2, dstbox.y2,
-	  width, height, ctx->Pixel.ZoomX, ctx->Pixel.ZoomY);
-
-      if (intel_intersect_cliprects(&tmp, &srcbox, &dstbox)) {
-         DBG("%s: regions overlap\n", __FUNCTION__);
-         return GL_FALSE;
-      }
-   }
-
-   intelFlush(&intel->ctx);
-
-   intel->vtbl.install_meta_state(intel);
-
-   /* Is this true?  Also will need to turn depth testing on according
-    * to state:
-    */
-   intel->vtbl.meta_no_stencil_write(intel);
-   intel->vtbl.meta_no_depth_write(intel);
-
-   /* Set the 3d engine to draw into the destination region:
-    */
-   intel->vtbl.meta_draw_region(intel, dst, intel->depth_region);
-
-   intel->vtbl.meta_import_pixel_state(intel);
-
-   if (src->cpp == 2) {
-      src_format = GL_RGB;
-      src_type = GL_UNSIGNED_SHORT_5_6_5;
-   }
-   else {
-      src_format = GL_BGRA;
-      src_type = GL_UNSIGNED_BYTE;
-   }
-
-   /* Set the frontbuffer up as a large rectangular texture.
-    */
-   if (!intel->vtbl.meta_tex_rect_source(intel, src->buffer, 0,
-                                         src->pitch,
-                                         src->height, src_format, src_type)) {
-      intel->vtbl.leave_meta_state(intel);
-      return GL_FALSE;
-   }
-
-
-   intel->vtbl.meta_texture_blend_replace(intel);
-
-   LOCK_HARDWARE(intel);
-
-   if (intel->driDrawable->numClipRects) {
-      __DRIdrawablePrivate *dPriv = intel->driDrawable;
-
-
-      srcy = dPriv->h - srcy - height;  /* convert from gl to hardware coords */
-
-      srcx += dPriv->x;
-      srcy += dPriv->y;
-
-      /* Clip against the source region.  This is the only source
-       * clipping we do.  XXX: Just set the texcord wrap mode to clamp
-       * or similar.
-       *
-       */
-      if (0) {
-         GLint orig_x = srcx;
-         GLint orig_y = srcy;
-
-         if (!_mesa_clip_to_region(0, 0, src->pitch, src->height,
-                                   &srcx, &srcy, &width, &height))
-            goto out;
-
-         dstx += srcx - orig_x;
-         dsty += (srcy - orig_y) * ctx->Pixel.ZoomY;
-      }
-
-      /* Just use the regular cliprect mechanism...  Does this need to
-       * even hold the lock???
-       */
-      intel->vtbl.meta_draw_quad(intel,
-				 dstx,
-				 dstx + width * ctx->Pixel.ZoomX,
-				 dPriv->h - (dsty + height * ctx->Pixel.ZoomY),
-				 dPriv->h - (dsty), 0, /* XXX: what z value? */
-				 0x00ff00ff,
-				 srcx, srcx + width, srcy, srcy + height);
-
-    out:
-      intel->vtbl.leave_meta_state(intel);
-      intel_batchbuffer_emit_mi_flush(intel->batch);
-   }
-   UNLOCK_HARDWARE(intel);
-
-   DBG("%s: success\n", __FUNCTION__);
-   return GL_TRUE;
-}
-#endif /* I915 */
-
 
 /**
  * CopyPixels with the blitter.  Don't support zooming, pixel transfer, etc.
@@ -272,6 +111,12 @@ do_blit_copypixels(GLcontext * ctx,
    drm_clip_rect_t *cliprects;
    int x_off, y_off;
 
+   if (type == GL_DEPTH || type == GL_STENCIL) {
+      if (INTEL_DEBUG & DEBUG_FALLBACKS)
+	 fprintf(stderr, "glCopyPixels() fallback: GL_DEPTH || GL_STENCIL\n");
+      return GL_FALSE;
+   }
+
    /* Update draw buffer bounds */
    _mesa_update_state(ctx);
 
@@ -362,14 +207,16 @@ do_blit_copypixels(GLcontext * ctx,
 				   &clip_x, &clip_y, &clip_w, &clip_h))
             continue;
 
-         intelEmitCopyBlit(intel, dst->cpp,
-			   src->pitch, src->buffer, 0, src->tiling,
-			   dst->pitch, dst->buffer, 0, dst->tiling,
-			   clip_x + delta_x, clip_y + delta_y, /* srcx, srcy */
-			   clip_x, clip_y, /* dstx, dsty */
-			   clip_w, clip_h,
-			   ctx->Color.ColorLogicOpEnabled ?
-			   ctx->Color.LogicOp : GL_COPY);
+	 if (!intel_region_copy(intel,
+				dst, 0, clip_x, clip_y,
+				src, 0, clip_x + delta_x, clip_y + delta_y,
+				clip_w, clip_h,
+				ctx->Color.ColorLogicOpEnabled ?
+				ctx->Color.LogicOp : GL_COPY)) {
+	    DBG("%s: blit failure\n", __FUNCTION__);
+	    UNLOCK_HARDWARE(intel);
+	    return GL_FALSE;
+	 }
       }
    }
 out:
@@ -392,12 +239,6 @@ intelCopyPixels(GLcontext * ctx,
    if (do_blit_copypixels(ctx, srcx, srcy, width, height, destx, desty, type))
       return;
 
-#ifdef I915
-   if (do_texture_copypixels(ctx, srcx, srcy, width, height, destx, desty, type))
-      return;
-#endif
-
-   DBG("fallback to _swrast_CopyPixels\n");
-
-   _swrast_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type);
+   /* this will use swrast if needed */
+   _mesa_meta_copy_pixels(ctx, srcx, srcy, width, height, destx, desty, type);
 }
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_draw.c b/src/mesa/drivers/dri/intel/intel_pixel_draw.c
index d80069dd58..7fbb89fd6a 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_draw.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_draw.c
@@ -29,8 +29,6 @@
 #include "main/enums.h"
 #include "main/image.h"
 #include "main/mtypes.h"
-#include "main/macros.h"
-#include "main/bufferobj.h"
 #include "main/teximage.h"
 #include "main/texenv.h"
 #include "main/texobj.h"
@@ -41,169 +39,22 @@
 #include "main/enable.h"
 #include "main/buffers.h"
 #include "main/fbobject.h"
-#include "main/renderbuffer.h"
 #include "main/depth.h"
 #include "main/hash.h"
 #include "main/blend.h"
-#include "glapi/dispatch.h"
 #include "swrast/swrast.h"
+#include "drivers/common/meta.h"
 
-#include "intel_screen.h"
 #include "intel_context.h"
 #include "intel_batchbuffer.h"
 #include "intel_blit.h"
 #include "intel_buffers.h"
 #include "intel_regions.h"
 #include "intel_pixel.h"
-#include "intel_buffer_objects.h"
 #include "intel_fbo.h"
 
-static GLboolean
-intel_texture_drawpixels(GLcontext * ctx,
-			 GLint x, GLint y,
-			 GLsizei width, GLsizei height,
-			 GLenum format,
-			 GLenum type,
-			 const struct gl_pixelstore_attrib *unpack,
-			 const GLvoid *pixels)
-{
-   struct intel_context *intel = intel_context(ctx);
-   GLuint texname;
-   GLfloat vertices[4][4];
-   GLfloat texcoords[4][2];
-   GLfloat z;
-   GLint old_active_texture;
-   GLenum internalFormat;
-
-   /* We're going to mess with texturing with no regard to existing texture
-    * state, so if there is some set up we have to bail.
-    */
-   if (ctx->Texture._EnabledUnits != 0) {
-      if (INTEL_DEBUG & DEBUG_FALLBACKS)
-	 fprintf(stderr, "glDrawPixels() fallback: texturing enabled\n");
-      return GL_FALSE;
-   }
-
-   /* Can't do textured DrawPixels with a fragment program, unless we were
-    * to generate a new program that sampled our texture and put the results
-    * in the fragment color before the user's program started.
-    */
-   if (ctx->FragmentProgram.Enabled) {
-      if (INTEL_DEBUG & DEBUG_FALLBACKS)
-	 fprintf(stderr, "glDrawPixels() fallback: fragment program enabled\n");
-      return GL_FALSE;
-   }
-
-   /* We don't have a way to generate fragments with stencil values which
-    * will set the resulting stencil value.
-    */
-   if (format == GL_STENCIL_INDEX || format == GL_DEPTH_STENCIL)
-      return GL_FALSE;
-
-   /* Check that we can load in a texture this big. */
-   if (width > (1 << (ctx->Const.MaxTextureLevels - 1)) ||
-       height > (1 << (ctx->Const.MaxTextureLevels - 1))) {
-      if (INTEL_DEBUG & DEBUG_FALLBACKS)
-	 fprintf(stderr, "glDrawPixels() fallback: bitmap too large (%dx%d)\n",
-		 width, height);
-      return GL_FALSE;
-   }
-
-   /* To do DEPTH_COMPONENT, we would need to change our setup to not draw to
-    * the color buffer, and sample the texture values into the fragment depth
-    * in a program.
-    */
-   if (format == GL_DEPTH_COMPONENT) {
-      if (INTEL_DEBUG & DEBUG_FALLBACKS)
-	 fprintf(stderr,
-		 "glDrawPixels() fallback: format == GL_DEPTH_COMPONENT\n");
-      return GL_FALSE;
-   }
-
-   if (!ctx->Extensions.ARB_texture_non_power_of_two &&
-       (!is_power_of_two(width) || !is_power_of_two(height))) {
-      if (INTEL_DEBUG & DEBUG_FALLBACKS)
-	 fprintf(stderr,
-		 "glDrawPixels() fallback: NPOT texture\n");
-      return GL_FALSE;
-   }
-
-   _mesa_PushAttrib(GL_ENABLE_BIT | GL_TEXTURE_BIT |
-		    GL_CURRENT_BIT);
-   _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT);
-
-   /* XXX: pixel store stuff */
-   _mesa_Disable(GL_POLYGON_STIPPLE);
-
-   old_active_texture = ctx->Texture.CurrentUnit;
-   _mesa_ActiveTextureARB(GL_TEXTURE0_ARB);
-   _mesa_Enable(GL_TEXTURE_2D);
-   _mesa_GenTextures(1, &texname);
-   _mesa_BindTexture(GL_TEXTURE_2D, texname);
-   _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
-   _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
-   _mesa_TexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
-   if (type == GL_ALPHA)
-      internalFormat = GL_ALPHA;
-   else
-      internalFormat = GL_RGBA;
-   _mesa_TexImage2D(GL_TEXTURE_2D, 0, internalFormat, width, height, 0, format,
-		    type, pixels);
-
-   intel_meta_set_passthrough_transform(intel);
-
-   /* convert rasterpos Z from [0,1] to NDC coord in [-1,1] */
-   z = -1.0 + 2.0 * ctx->Current.RasterPos[2];
-
-   /* Create the vertex buffer based on the current raster pos.  The x and y
-    * we're handed are ctx->Current.RasterPos[0,1] rounded to integers.
-    * We also apply the depth.  However, the W component is already multiplied
-    * into ctx->Current.RasterPos[0,1,2] and we can ignore it at this point.
-    */
-   vertices[0][0] = x;
-   vertices[0][1] = y;
-   vertices[0][2] = z;
-   vertices[0][3] = 1.0;
-   vertices[1][0] = x + width * ctx->Pixel.ZoomX;
-   vertices[1][1] = y;
-   vertices[1][2] = z;
-   vertices[1][3] = 1.0;
-   vertices[2][0] = x + width * ctx->Pixel.ZoomX;
-   vertices[2][1] = y + height * ctx->Pixel.ZoomY;
-   vertices[2][2] = z;
-   vertices[2][3] = 1.0;
-   vertices[3][0] = x;
-   vertices[3][1] = y + height * ctx->Pixel.ZoomY;
-   vertices[3][2] = z;
-   vertices[3][3] = 1.0;
-
-   texcoords[0][0] = 0.0;
-   texcoords[0][1] = 0.0;
-   texcoords[1][0] = 1.0;
-   texcoords[1][1] = 0.0;
-   texcoords[2][0] = 1.0;
-   texcoords[2][1] = 1.0;
-   texcoords[3][0] = 0.0;
-   texcoords[3][1] = 1.0;
-
-   _mesa_VertexPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), &vertices);
-   _mesa_ClientActiveTextureARB(GL_TEXTURE0);
-   _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &texcoords);
-   _mesa_Enable(GL_VERTEX_ARRAY);
-   _mesa_Enable(GL_TEXTURE_COORD_ARRAY);
-   _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
-
-   intel_meta_restore_transform(intel);
-
-   _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture);
-   _mesa_PopClientAttrib();
-   _mesa_PopAttrib();
-
-   _mesa_DeleteTextures(1, &texname);
-
-   return GL_TRUE;
-}
 
+/** XXX compare perf of this vs. _mesa_meta_draw_pixels(STENCIL) */
 static GLboolean
 intel_stencil_drawpixels(GLcontext * ctx,
 			 GLint x, GLint y,
@@ -216,14 +67,14 @@ intel_stencil_drawpixels(GLcontext * ctx,
    struct intel_context *intel = intel_context(ctx);
    GLuint texname, rb_name, fb_name, old_fb_name;
    GLfloat vertices[4][2];
-   GLfloat texcoords[4][2];
    struct intel_renderbuffer *irb;
    struct intel_renderbuffer *depth_irb;
    struct gl_renderbuffer *rb;
    struct gl_pixelstore_attrib old_unpack;
    GLstencil *stencil_pixels;
-   int row;
+   int row, y1, y2;
    GLint old_active_texture;
+   GLboolean rendering_to_fbo = ctx->DrawBuffer->Name != 0;
 
    if (format != GL_STENCIL_INDEX)
       return GL_FALSE;
@@ -358,34 +209,35 @@ intel_stencil_drawpixels(GLcontext * ctx,
    ctx->Unpack = old_unpack;
    _mesa_free(stencil_pixels);
 
-   intel_meta_set_passthrough_transform(intel);
+   meta_set_passthrough_transform(&intel->meta);
 
+   /* Since we're rendering to the framebuffer as if it was an FBO,
+    * if it's the window system we have to flip the coordinates.
+    */
+   if (rendering_to_fbo) {
+      y1 = y;
+      y2 = y + height * ctx->Pixel.ZoomY;
+   } else {
+      y1 = irb->Base.Height - (y + height * ctx->Pixel.ZoomY);
+      y2 = irb->Base.Height - y;
+   }
    vertices[0][0] = x;
-   vertices[0][1] = y;
+   vertices[0][1] = y1;
    vertices[1][0] = x + width * ctx->Pixel.ZoomX;
-   vertices[1][1] = y;
+   vertices[1][1] = y1;
    vertices[2][0] = x + width * ctx->Pixel.ZoomX;
-   vertices[2][1] = y + height * ctx->Pixel.ZoomY;
+   vertices[2][1] = y2;
    vertices[3][0] = x;
-   vertices[3][1] = y + height * ctx->Pixel.ZoomY;
-
-   texcoords[0][0] = 0.0;
-   texcoords[0][1] = 0.0;
-   texcoords[1][0] = 1.0;
-   texcoords[1][1] = 0.0;
-   texcoords[2][0] = 1.0;
-   texcoords[2][1] = 1.0;
-   texcoords[3][0] = 0.0;
-   texcoords[3][1] = 1.0;
+   vertices[3][1] = y2;
 
    _mesa_VertexPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &vertices);
-   _mesa_ClientActiveTextureARB(GL_TEXTURE0);
-   _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &texcoords);
    _mesa_Enable(GL_VERTEX_ARRAY);
-   _mesa_Enable(GL_TEXTURE_COORD_ARRAY);
+   meta_set_default_texrect(&intel->meta);
+
    _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
 
-   intel_meta_restore_transform(intel);
+   meta_restore_texcoords(&intel->meta);
+   meta_restore_transform(&intel->meta);
 
    _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture);
    _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, old_fb_name);
@@ -409,17 +261,25 @@ intelDrawPixels(GLcontext * ctx,
                 const struct gl_pixelstore_attrib *unpack,
                 const GLvoid * pixels)
 {
-   if (intel_texture_drawpixels(ctx, x, y, width, height, format, type,
-				unpack, pixels))
-      return;
-
+#if 0
+   /* XXX this function doesn't seem to work reliably even when all
+    * the pre-requisite conditions are met.
+    * Note that this function is never hit with conform.
+    * Fall back to swrast because even the _mesa_meta_draw_pixels() approach
+    * isn't working because of an apparent stencil bug.
+    */
    if (intel_stencil_drawpixels(ctx, x, y, width, height, format, type,
 				unpack, pixels))
       return;
+#else
+   (void) intel_stencil_drawpixels; /* silence warning */
+   if (format == GL_STENCIL_INDEX) {
+      _swrast_DrawPixels(ctx, x, y, width, height, format, type,
+                         unpack, pixels);
+      return;
+   }
+#endif
 
-   if (INTEL_DEBUG & DEBUG_PIXEL)
-      _mesa_printf("%s: fallback to swrast\n", __FUNCTION__);
-
-   _swrast_DrawPixels(ctx, x, y, width, height, format, type,
-		      unpack, pixels);
+   _mesa_meta_draw_pixels(ctx, x, y, width, height, format, type,
+                          unpack, pixels);
 }
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_read.c b/src/mesa/drivers/dri/intel/intel_pixel_read.c
index 0370255614..8713463ace 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_read.c
@@ -180,7 +180,7 @@ do_blit_readpixels(GLcontext * ctx,
    if (!src)
       return GL_FALSE;
 
-   if (dst) {
+   if (pack->BufferObj->Name) {
       /* XXX This validation should be done by core mesa:
        */
       if (!_mesa_validate_pbo_access(2, pack, width, height, 1,
@@ -261,16 +261,19 @@ do_blit_readpixels(GLcontext * ctx,
          if (!intel_intersect_cliprects(&rect, &src_rect, &box[i]))
             continue;
 
-         intelEmitCopyBlit(intel,
-                           src->cpp,
-                           src->pitch, src->buffer, 0, src->tiling,
-                           rowLength, dst_buffer, dst_offset, GL_FALSE,
-                           rect.x1,
-                           rect.y1,
-                           rect.x1 - src_rect.x1,
-                           rect.y2 - src_rect.y2,
-                           rect.x2 - rect.x1, rect.y2 - rect.y1,
-			   GL_COPY);
+         if (!intelEmitCopyBlit(intel,
+				src->cpp,
+				src->pitch, src->buffer, 0, src->tiling,
+				rowLength, dst_buffer, dst_offset, GL_FALSE,
+				rect.x1,
+				rect.y1,
+				rect.x1 - src_rect.x1,
+				rect.y2 - src_rect.y2,
+				rect.x2 - rect.x1, rect.y2 - rect.y1,
+				GL_COPY)) {
+	    UNLOCK_HARDWARE(intel);
+	    return GL_FALSE;
+	 }
       }
    }
    UNLOCK_HARDWARE(intel);
diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h
index 57ac8f0cc1..d19f1bae34 100644
--- a/src/mesa/drivers/dri/intel/intel_reg.h
+++ b/src/mesa/drivers/dri/intel/intel_reg.h
@@ -189,6 +189,19 @@
 
 #define S7_DEPTH_OFFSET_CONST_MASK     ~0
 
+/* p143 */
+#define _3DSTATE_BUF_INFO_CMD	(CMD_3D | (0x1d<<24) | (0x8e<<16) | 1)
+/* Dword 1 */
+#define BUF_3D_ID_COLOR_BACK	(0x3<<24)
+#define BUF_3D_ID_DEPTH 	(0x7<<24)
+#define BUF_3D_USE_FENCE	(1<<23)
+#define BUF_3D_TILED_SURFACE	(1<<22)
+#define BUF_3D_TILE_WALK_X	0
+#define BUF_3D_TILE_WALK_Y	(1<<21)
+#define BUF_3D_PITCH(x)         (((x)/4)<<2)
+/* Dword 2 */
+#define BUF_3D_ADDR(x)		((x) & ~0x3)
+
 /* Primitive dispatch on 830-945 */
 #define _3DPRIMITIVE			(CMD_3D | (0x1f << 24))
 #define PRIM_INDIRECT            (1<<23)
diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c
index b8d2dec4cb..a86c66a844 100644
--- a/src/mesa/drivers/dri/intel/intel_regions.c
+++ b/src/mesa/drivers/dri/intel/intel_regions.c
@@ -52,17 +52,77 @@
 
 #define FILE_DEBUG_FLAG DEBUG_REGION
 
+/* This should be set to the maximum backtrace size desired.
+ * Set it to 0 to disable backtrace debugging.
+ */
+#define DEBUG_BACKTRACE_SIZE 0
+
+#if DEBUG_BACKTRACE_SIZE == 0
+/* Use the standard debug output */
+#define _DBG(...) DBG(__VA_ARGS__)
+#else
+/* Use backtracing debug output */
+#define _DBG(...) {debug_backtrace(); DBG(__VA_ARGS__);}
+
+/* Backtracing debug support */
+#include <execinfo.h>
+
+static void
+debug_backtrace(void)
+{
+   void *trace[DEBUG_BACKTRACE_SIZE];
+   char **strings = NULL;
+   int traceSize;
+   register int i;
+
+   traceSize = backtrace(trace, DEBUG_BACKTRACE_SIZE);
+   strings = backtrace_symbols(trace, traceSize);
+   if (strings == NULL) {
+      DBG("no backtrace:");
+      return;
+   }
+
+   /* Spit out all the strings with a colon separator.  Ignore
+    * the first, since we don't really care about the call
+    * to debug_backtrace() itself.  Skip until the final "/" in
+    * the trace to avoid really long lines.
+    */
+   for (i = 1; i < traceSize; i++) {
+      char *p = strings[i], *slash = strings[i];
+      while (*p) {
+         if (*p++ == '/') {
+            slash = p;
+         }
+      }
+
+      DBG("%s:", slash);
+   }
+
+   /* Free up the memory, and we're done */
+   free(strings);
+}
+
+#endif
+
+
+
 /* XXX: Thread safety?
  */
 GLubyte *
 intel_region_map(struct intel_context *intel, struct intel_region *region)
 {
-   DBG("%s\n", __FUNCTION__);
+   intelFlush(&intel->ctx);
+
+   _DBG("%s %p\n", __FUNCTION__, region);
    if (!region->map_refcount++) {
       if (region->pbo)
          intel_region_cow(intel, region);
 
-      dri_bo_map(region->buffer, GL_TRUE);
+      if (region->tiling != I915_TILING_NONE &&
+	  intel->intelScreen->kernel_exec_fencing)
+	 drm_intel_gem_bo_map_gtt(region->buffer);
+      else
+	 dri_bo_map(region->buffer, GL_TRUE);
       region->map = region->buffer->virtual;
    }
 
@@ -72,9 +132,13 @@ intel_region_map(struct intel_context *intel, struct intel_region *region)
 void
 intel_region_unmap(struct intel_context *intel, struct intel_region *region)
 {
-   DBG("%s\n", __FUNCTION__);
+   _DBG("%s %p\n", __FUNCTION__, region);
    if (!--region->map_refcount) {
-      dri_bo_unmap(region->buffer);
+      if (region->tiling != I915_TILING_NONE &&
+	  intel->intelScreen->kernel_exec_fencing)
+	 drm_intel_gem_bo_unmap_gtt(region->buffer);
+      else
+	 dri_bo_unmap(region->buffer);
       region->map = NULL;
    }
 }
@@ -87,10 +151,10 @@ intel_region_alloc_internal(struct intel_context *intel,
 {
    struct intel_region *region;
 
-   DBG("%s\n", __FUNCTION__);
-
-   if (buffer == NULL)
+   if (buffer == NULL) {
+      _DBG("%s <-- NULL\n", __FUNCTION__);
       return NULL;
+   }
 
    region = calloc(sizeof(*region), 1);
    region->cpp = cpp;
@@ -104,15 +168,33 @@ intel_region_alloc_internal(struct intel_context *intel,
    region->tiling = I915_TILING_NONE;
    region->bit_6_swizzle = I915_BIT_6_SWIZZLE_NONE;
 
+   _DBG("%s <-- %p\n", __FUNCTION__, region);
    return region;
 }
 
 struct intel_region *
 intel_region_alloc(struct intel_context *intel,
+		   uint32_t tiling,
                    GLuint cpp, GLuint width, GLuint height, GLuint pitch,
 		   GLboolean expect_accelerated_upload)
 {
    dri_bo *buffer;
+   struct intel_region *region;
+
+   /* If we're tiled, our allocations are in 8 or 32-row blocks, so
+    * failure to align our height means that we won't allocate enough pages.
+    *
+    * If we're untiled, we still have to align to 2 rows high because the
+    * data port accesses 2x2 blocks even if the bottom row isn't to be
+    * rendered, so failure to align means we could walk off the end of the
+    * GTT and fault.
+    */
+   if (tiling == I915_TILING_X)
+      height = ALIGN(height, 8);
+   else if (tiling == I915_TILING_Y)
+      height = ALIGN(height, 32);
+   else
+      height = ALIGN(height, 2);
 
    /* If we're untiled, we have to align to 2 rows high because the
     * data port accesses 2x2 blocks even if the bottom row isn't to be
@@ -129,7 +211,16 @@ intel_region_alloc(struct intel_context *intel,
 				  pitch * cpp * height, 64);
    }
 
-   return intel_region_alloc_internal(intel, cpp, width, height, pitch, buffer);
+   region = intel_region_alloc_internal(intel, cpp, width, height,
+					pitch, buffer);
+
+   if (tiling != I915_TILING_NONE) {
+      assert(((pitch * cpp) & 127) == 0);
+      drm_intel_bo_set_tiling(buffer, &tiling, pitch * cpp);
+      drm_intel_bo_get_tiling(buffer, &region->tiling, &region->bit_6_swizzle);
+   }
+
+   return region;
 }
 
 struct intel_region *
@@ -165,7 +256,7 @@ void
 intel_region_reference(struct intel_region **dst, struct intel_region *src)
 {
    if (src)
-      DBG("%s %p %d\n", __FUNCTION__, src, src->refcount);
+      _DBG("%s %p %d\n", __FUNCTION__, src, src->refcount);
 
    assert(*dst == NULL);
    if (src) {
@@ -179,10 +270,12 @@ intel_region_release(struct intel_region **region_handle)
 {
    struct intel_region *region = *region_handle;
 
-   if (region == NULL)
+   if (region == NULL) {
+      _DBG("%s NULL\n", __FUNCTION__);
       return;
+   }
 
-   DBG("%s %p %d\n", __FUNCTION__, region, region->refcount - 1);
+   _DBG("%s %p %d\n", __FUNCTION__, region, region->refcount - 1);
 
    ASSERT(region->refcount > 0);
    region->refcount--;
@@ -256,9 +349,7 @@ intel_region_data(struct intel_context *intel,
                   const void *src, GLuint src_pitch,
                   GLuint srcx, GLuint srcy, GLuint width, GLuint height)
 {
-   GLboolean locked = GL_FALSE;
-
-   DBG("%s\n", __FUNCTION__);
+   _DBG("%s\n", __FUNCTION__);
 
    if (intel == NULL)
       return;
@@ -271,39 +362,33 @@ intel_region_data(struct intel_context *intel,
          intel_region_cow(intel, dst);
    }
 
-   if (!intel->locked) {
-      LOCK_HARDWARE(intel);
-      locked = GL_TRUE;
-   }
-
+   LOCK_HARDWARE(intel);
    _mesa_copy_rect(intel_region_map(intel, dst) + dst_offset,
                    dst->cpp,
                    dst->pitch,
                    dstx, dsty, width, height, src, src_pitch, srcx, srcy);
 
    intel_region_unmap(intel, dst);
-
-   if (locked)
-      UNLOCK_HARDWARE(intel);
-
+   UNLOCK_HARDWARE(intel);
 }
 
 /* Copy rectangular sub-regions. Need better logic about when to
  * push buffers into AGP - will currently do so whenever possible.
  */
-void
+GLboolean
 intel_region_copy(struct intel_context *intel,
                   struct intel_region *dst,
                   GLuint dst_offset,
                   GLuint dstx, GLuint dsty,
                   struct intel_region *src,
                   GLuint src_offset,
-                  GLuint srcx, GLuint srcy, GLuint width, GLuint height)
+                  GLuint srcx, GLuint srcy, GLuint width, GLuint height,
+		  GLenum logicop)
 {
-   DBG("%s\n", __FUNCTION__);
+   _DBG("%s\n", __FUNCTION__);
 
    if (intel == NULL)
-      return;
+      return GL_FALSE;
 
    if (dst->pbo) {
       if (dstx == 0 &&
@@ -315,41 +400,12 @@ intel_region_copy(struct intel_context *intel,
 
    assert(src->cpp == dst->cpp);
 
-   intelEmitCopyBlit(intel,
-                     dst->cpp,
-                     src->pitch, src->buffer, src_offset, src->tiling,
-                     dst->pitch, dst->buffer, dst_offset, dst->tiling,
-                     srcx, srcy, dstx, dsty, width, height,
-		     GL_COPY);
-}
-
-/* Fill a rectangular sub-region.  Need better logic about when to
- * push buffers into AGP - will currently do so whenever possible.
- */
-void
-intel_region_fill(struct intel_context *intel,
-                  struct intel_region *dst,
-                  GLuint dst_offset,
-                  GLuint dstx, GLuint dsty,
-                  GLuint width, GLuint height, GLuint color)
-{
-   DBG("%s\n", __FUNCTION__);
-
-   if (intel == NULL)
-      return;   
-
-   if (dst->pbo) {
-      if (dstx == 0 &&
-          dsty == 0 && width == dst->pitch && height == dst->height)
-         intel_region_release_pbo(intel, dst);
-      else
-         intel_region_cow(intel, dst);
-   }
-
-   intelEmitFillBlit(intel,
-                     dst->cpp,
-                     dst->pitch, dst->buffer, dst_offset, dst->tiling,
-                     dstx, dsty, width, height, color);
+   return intelEmitCopyBlit(intel,
+			    dst->cpp,
+			    src->pitch, src->buffer, src_offset, src->tiling,
+			    dst->pitch, dst->buffer, dst_offset, dst->tiling,
+			    srcx, srcy, dstx, dsty, width, height,
+			    logicop);
 }
 
 /* Attach to a pbo, discarding our data.  Effectively zero-copy upload
@@ -360,9 +416,13 @@ intel_region_attach_pbo(struct intel_context *intel,
                         struct intel_region *region,
                         struct intel_buffer_object *pbo)
 {
+   dri_bo *buffer;
+
    if (region->pbo == pbo)
       return;
 
+   _DBG("%s %p %p\n", __FUNCTION__, region, pbo);
+
    /* If there is already a pbo attached, break the cow tie now.
     * Don't call intel_region_release_pbo() as that would
     * unnecessarily allocate a new buffer we would have to immediately
@@ -378,10 +438,13 @@ intel_region_attach_pbo(struct intel_context *intel,
       region->buffer = NULL;
    }
 
+   /* make sure pbo has a buffer of its own */
+   buffer = intel_bufferobj_buffer(intel, pbo, INTEL_WRITE_FULL);
+
    region->pbo = pbo;
    region->pbo->region = region;
-   dri_bo_reference(pbo->buffer);
-   region->buffer = pbo->buffer;
+   dri_bo_reference(buffer);
+   region->buffer = buffer;
 }
 
 
@@ -392,6 +455,7 @@ void
 intel_region_release_pbo(struct intel_context *intel,
                          struct intel_region *region)
 {
+   _DBG("%s %p\n", __FUNCTION__, region);
    assert(region->buffer == region->pbo->buffer);
    region->pbo->region = NULL;
    region->pbo = NULL;
@@ -410,34 +474,27 @@ void
 intel_region_cow(struct intel_context *intel, struct intel_region *region)
 {
    struct intel_buffer_object *pbo = region->pbo;
-   GLboolean was_locked = intel->locked;
-
-   if (intel == NULL)
-      return;
+   GLboolean ok;
 
    intel_region_release_pbo(intel, region);
 
    assert(region->cpp * region->pitch * region->height == pbo->Base.Size);
 
-   DBG("%s (%d bytes)\n", __FUNCTION__, pbo->Base.Size);
+   _DBG("%s %p (%d bytes)\n", __FUNCTION__, region, pbo->Base.Size);
 
    /* Now blit from the texture buffer to the new buffer: 
     */
 
-   was_locked = intel->locked;
-   if (!was_locked)
-      LOCK_HARDWARE(intel);
-
-   intelEmitCopyBlit(intel,
-		     region->cpp,
-		     region->pitch, region->buffer, 0, region->tiling,
-		     region->pitch, pbo->buffer, 0, region->tiling,
-		     0, 0, 0, 0,
-		     region->pitch, region->height,
-		     GL_COPY);
-
-   if (!was_locked)
-      UNLOCK_HARDWARE(intel);
+   LOCK_HARDWARE(intel);
+   ok = intelEmitCopyBlit(intel,
+                          region->cpp,
+                          region->pitch, pbo->buffer, 0, region->tiling,
+                          region->pitch, region->buffer, 0, region->tiling,
+                          0, 0, 0, 0,
+                          region->pitch, region->height,
+                          GL_COPY);
+   assert(ok);
+   UNLOCK_HARDWARE(intel);
 }
 
 dri_bo *
@@ -466,6 +523,10 @@ intel_recreate_static(struct intel_context *intel,
    if (region == NULL) {
       region = calloc(sizeof(*region), 1);
       region->refcount = 1;
+      _DBG("%s creating new region %p\n", __FUNCTION__, region);
+   }
+   else {
+      _DBG("%s %p\n", __FUNCTION__, region);
    }
 
    if (intel->ctx.Visual.rgbBits == 24)
diff --git a/src/mesa/drivers/dri/intel/intel_regions.h b/src/mesa/drivers/dri/intel/intel_regions.h
index 45e2bf4e77..0d379bdc6e 100644
--- a/src/mesa/drivers/dri/intel/intel_regions.h
+++ b/src/mesa/drivers/dri/intel/intel_regions.h
@@ -73,7 +73,8 @@ struct intel_region
  * copied by calling intel_reference_region().
  */
 struct intel_region *intel_region_alloc(struct intel_context *intel,
-                                        GLuint cpp, GLuint width,
+                                        uint32_t tiling,
+					GLuint cpp, GLuint width,
                                         GLuint height, GLuint pitch,
 					GLboolean expect_accelerated_upload);
 
@@ -109,21 +110,15 @@ void intel_region_data(struct intel_context *intel,
 
 /* Copy rectangular sub-regions
  */
-void intel_region_copy(struct intel_context *intel,
-                       struct intel_region *dest,
-                       GLuint dest_offset,
-                       GLuint destx, GLuint desty,
-                       struct intel_region *src,
-                       GLuint src_offset,
-                       GLuint srcx, GLuint srcy, GLuint width, GLuint height);
-
-/* Fill a rectangular sub-region
- */
-void intel_region_fill(struct intel_context *intel,
-                       struct intel_region *dest,
-                       GLuint dest_offset,
-                       GLuint destx, GLuint desty,
-                       GLuint width, GLuint height, GLuint color);
+GLboolean
+intel_region_copy(struct intel_context *intel,
+		  struct intel_region *dest,
+		  GLuint dest_offset,
+		  GLuint destx, GLuint desty,
+		  struct intel_region *src,
+		  GLuint src_offset,
+		  GLuint srcx, GLuint srcy, GLuint width, GLuint height,
+		  GLenum logicop);
 
 /* Helpers for zerocopy uploads, particularly texture image uploads:
  */
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c
index f810850ef5..1b8c56e68d 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -49,6 +49,10 @@
 #include "i915_drm.h"
 #include "i830_dri.h"
 
+#define DRI_CONF_TEXTURE_TILING(def) \
+	DRI_CONF_OPT_BEGIN(texture_tiling, bool, def)		\
+		DRI_CONF_DESC(en, "Enable texture tiling")	\
+	DRI_CONF_OPT_END					\
 
 PUBLIC const char __driConfigOptions[] =
    DRI_CONF_BEGIN
@@ -64,6 +68,17 @@ PUBLIC const char __driConfigOptions[] =
 	    DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects")
 	 DRI_CONF_DESC_END
       DRI_CONF_OPT_END
+
+#ifdef I915
+     DRI_CONF_TEXTURE_TILING(false)
+#else
+     DRI_CONF_TEXTURE_TILING(true)
+#endif
+
+      DRI_CONF_OPT_BEGIN(early_z, bool, false)
+	 DRI_CONF_DESC(en, "Enable early Z in classic mode (unstable, 945-only).")
+      DRI_CONF_OPT_END
+
    DRI_CONF_SECTION_END
    DRI_CONF_SECTION_QUALITY
       DRI_CONF_FORCE_S3TC_ENABLE(false)
@@ -76,7 +91,7 @@ PUBLIC const char __driConfigOptions[] =
    DRI_CONF_SECTION_END
 DRI_CONF_END;
 
-const GLuint __driNConfigOptions = 8;
+const GLuint __driNConfigOptions = 10;
 
 #ifdef USE_NEW_INTERFACE
 static PFNGLXCREATECONTEXTMODES create_context_modes = NULL;
@@ -587,7 +602,7 @@ intel_init_bufmgr(intelScreenPrivate *intelScreen)
    GLboolean gem_supported;
    struct drm_i915_getparam gp;
    __DRIscreenPrivate *spriv = intelScreen->driScrnPriv;
-   int num_fences;
+   int num_fences = 0;
 
    intelScreen->no_hw = getenv("INTEL_NO_HW") != NULL;
 
diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c
index 34b78ebc1a..8df4990880 100644
--- a/src/mesa/drivers/dri/intel/intel_span.c
+++ b/src/mesa/drivers/dri/intel/intel_span.c
@@ -501,7 +501,7 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map)
 
 
 /**
- * Prepare for softare rendering.  Map current read/draw framebuffers'
+ * Prepare for software rendering.  Map current read/draw framebuffers'
  * renderbuffes and all currently bound texture objects.
  *
  * Old note: Moved locking out to get reasonable span performance.
@@ -526,7 +526,7 @@ intelSpanRenderStart(GLcontext * ctx)
 }
 
 /**
- * Called when done softare rendering.  Unmap the buffers we mapped in
+ * Called when done software rendering.  Unmap the buffers we mapped in
  * the above function.
  */
 void
diff --git a/src/mesa/drivers/dri/intel/intel_syncobj.c b/src/mesa/drivers/dri/intel/intel_syncobj.c
new file mode 100644
index 0000000000..1286fe929b
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_syncobj.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file intel_syncobj.c
+ *
+ * Support for ARB_sync
+ *
+ * ARB_sync is implemented by flushing the current batchbuffer and keeping a
+ * reference on it.  We can then check for completion or wait for compeltion
+ * using the normal buffer object mechanisms.  This does mean that if an
+ * application is using many sync objects, it will emit small batchbuffers
+ * which may end up being a significant overhead.  In other tests of removing
+ * gratuitous batchbuffer syncs in Mesa, it hasn't appeared to be a significant
+ * performance bottleneck, though.
+ */
+
+#include "main/simple_list.h"
+#include "main/imports.h"
+
+#include "intel_context.h"
+#include "intel_batchbuffer.h"
+#include "intel_reg.h"
+
+static struct gl_sync_object *
+intel_new_sync_object(GLcontext *ctx, GLuint id)
+{
+   struct intel_sync_object *sync;
+
+   sync = _mesa_calloc(sizeof(struct intel_sync_object));
+
+   return &sync->Base;
+}
+
+static void
+intel_delete_sync_object(GLcontext *ctx, struct gl_sync_object *s)
+{
+   struct intel_sync_object *sync = (struct intel_sync_object *)s;
+
+   drm_intel_bo_unreference(sync->bo);
+   _mesa_free(sync);
+}
+
+static void
+intel_fence_sync(GLcontext *ctx, struct gl_sync_object *s,
+	       GLenum condition, GLbitfield flags)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_sync_object *sync = (struct intel_sync_object *)s;
+
+   assert(condition == GL_SYNC_GPU_COMMANDS_COMPLETE);
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+
+   sync->bo = intel->batch->buf;
+   drm_intel_bo_reference(sync->bo);
+
+   intelFlush(ctx);
+}
+
+/* We ignore the user-supplied timeout.  This is weaselly -- we're allowed to
+ * round to an implementation-dependent accuracy, and right now our
+ * implementation "rounds" to the wait-forever value.
+ *
+ * The fix would be a new kernel function to do the GTT transition with a
+ * timeout.
+ */
+static void intel_client_wait_sync(GLcontext *ctx, struct gl_sync_object *s,
+				 GLbitfield flags, GLuint64 timeout)
+{
+   struct intel_sync_object *sync = (struct intel_sync_object *)s;
+
+   if (sync->bo) {
+      drm_intel_bo_wait_rendering(sync->bo);
+      s->StatusFlag = 1;
+      drm_intel_bo_unreference(sync->bo);
+      sync->bo = NULL;
+   }
+}
+
+/* We have nothing to do for WaitSync.  Our GL command stream is sequential,
+ * so given that the sync object has already flushed the batchbuffer,
+ * any batchbuffers coming after this waitsync will naturally not occur until
+ * the previous one is done.
+ */
+static void intel_server_wait_sync(GLcontext *ctx, struct gl_sync_object *s,
+				 GLbitfield flags, GLuint64 timeout)
+{
+}
+
+static void intel_check_sync(GLcontext *ctx, struct gl_sync_object *s)
+{
+   struct intel_sync_object *sync = (struct intel_sync_object *)s;
+
+   if (sync->bo && drm_intel_bo_busy(sync->bo)) {
+      drm_intel_bo_unreference(sync->bo);
+      sync->bo = NULL;
+      s->StatusFlag = 1;
+   }
+}
+
+void intel_init_syncobj_functions(struct dd_function_table *functions)
+{
+   functions->NewSyncObject = intel_new_sync_object;
+   functions->DeleteSyncObject = intel_delete_sync_object;
+   functions->FenceSync = intel_fence_sync;
+   functions->CheckSync = intel_check_sync;
+   functions->ClientWaitSync = intel_client_wait_sync;
+   functions->ServerWaitSync = intel_server_wait_sync;
+}
diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c
index ae0994b183..df63f29a42 100644
--- a/src/mesa/drivers/dri/intel/intel_tex.c
+++ b/src/mesa/drivers/dri/intel/intel_tex.c
@@ -158,81 +158,11 @@ timed_memcpy(void *dest, const void *src, size_t n)
 }
 #endif /* DO_DEBUG */
 
-/**
- * Generate new mipmap data from BASE+1 to BASE+p (the minimally-sized mipmap
- * level).
- *
- * The texture object's miptree must be mapped.
- *
- * It would be really nice if this was just called by Mesa whenever mipmaps
- * needed to be regenerated, rather than us having to remember to do so in
- * each texture image modification path.
- *
- * This function should also include an accelerated path.
- */
-void
-intel_generate_mipmap(GLcontext *ctx, GLenum target,
-                      struct gl_texture_object *texObj)
-{
-   struct intel_context *intel = intel_context(ctx);
-   struct intel_texture_object *intelObj = intel_texture_object(texObj);
-   GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
-   int face, i;
-
-   _mesa_generate_mipmap(ctx, target, texObj);
-
-   /* Update the level information in our private data in the new images, since
-    * it didn't get set as part of a normal TexImage path.
-    */
-   for (face = 0; face < nr_faces; face++) {
-      for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) {
-         struct intel_texture_image *intelImage;
-
-	 intelImage = intel_texture_image(texObj->Image[face][i]);
-	 if (intelImage == NULL)
-	    break;
-
-	 intelImage->level = i;
-	 intelImage->face = face;
-	 /* Unreference the miptree to signal that the new Data is a bare
-	  * pointer from mesa.
-	  */
-	 intel_miptree_release(intel, &intelImage->mt);
-      }
-   }
-}
-
-static void intelGenerateMipmap(GLcontext *ctx, GLenum target, struct gl_texture_object *texObj)
-{
-   struct intel_context *intel = intel_context(ctx);
-   struct intel_texture_object *intelObj = intel_texture_object(texObj);
-
-   intel_tex_map_level_images(intel, intelObj, texObj->BaseLevel);
-   intel_generate_mipmap(ctx, target, texObj);
-   intel_tex_unmap_level_images(intel, intelObj, texObj->BaseLevel);
-}
-
 void
 intelInitTextureFuncs(struct dd_function_table *functions)
 {
    functions->ChooseTextureFormat = intelChooseTextureFormat;
-   functions->TexImage1D = intelTexImage1D;
-   functions->TexImage2D = intelTexImage2D;
-   functions->TexImage3D = intelTexImage3D;
-   functions->TexSubImage1D = intelTexSubImage1D;
-   functions->TexSubImage2D = intelTexSubImage2D;
-   functions->TexSubImage3D = intelTexSubImage3D;
-   functions->CopyTexImage1D = intelCopyTexImage1D;
-   functions->CopyTexImage2D = intelCopyTexImage2D;
-   functions->CopyTexSubImage1D = intelCopyTexSubImage1D;
-   functions->CopyTexSubImage2D = intelCopyTexSubImage2D;
-   functions->GetTexImage = intelGetTexImage;
-   functions->GenerateMipmap = intelGenerateMipmap;
-
-   /* compressed texture functions */
-   functions->CompressedTexImage2D = intelCompressedTexImage2D;
-   functions->CompressedTexSubImage2D = intelCompressedTexSubImage2D;
-   functions->GetCompressedTexImage = intelGetCompressedTexImage;
+   functions->GenerateMipmap = intel_generate_mipmap;
 
    functions->NewTextureObject = intelNewTextureObject;
    functions->NewTextureImage = intelNewTextureImage;
diff --git a/src/mesa/drivers/dri/intel/intel_tex.h b/src/mesa/drivers/dri/intel/intel_tex.h
index f5372d82fb..471aa2a240 100644
--- a/src/mesa/drivers/dri/intel/intel_tex.h
+++ b/src/mesa/drivers/dri/intel/intel_tex.h
@@ -35,116 +35,17 @@
 
 void intelInitTextureFuncs(struct dd_function_table *functions);
 
+void intelInitTextureImageFuncs(struct dd_function_table *functions);
+
+void intelInitTextureSubImageFuncs(struct dd_function_table *functions);
+
+void intelInitTextureCopyImageFuncs(struct dd_function_table *functions);
+
 const struct gl_texture_format *intelChooseTextureFormat(GLcontext * ctx,
                                                          GLint internalFormat,
                                                          GLenum format,
                                                          GLenum type);
 
-
-void intelTexImage3D(GLcontext * ctx,
-                     GLenum target, GLint level,
-                     GLint internalFormat,
-                     GLint width, GLint height, GLint depth,
-                     GLint border,
-                     GLenum format, GLenum type, const void *pixels,
-                     const struct gl_pixelstore_attrib *packing,
-                     struct gl_texture_object *texObj,
-                     struct gl_texture_image *texImage);
-
-void intelTexSubImage3D(GLcontext * ctx,
-                        GLenum target,
-                        GLint level,
-                        GLint xoffset, GLint yoffset, GLint zoffset,
-                        GLsizei width, GLsizei height, GLsizei depth,
-                        GLenum format, GLenum type,
-                        const GLvoid * pixels,
-                        const struct gl_pixelstore_attrib *packing,
-                        struct gl_texture_object *texObj,
-                        struct gl_texture_image *texImage);
-
-void intelTexImage2D(GLcontext * ctx,
-                     GLenum target, GLint level,
-                     GLint internalFormat,
-                     GLint width, GLint height, GLint border,
-                     GLenum format, GLenum type, const void *pixels,
-                     const struct gl_pixelstore_attrib *packing,
-                     struct gl_texture_object *texObj,
-                     struct gl_texture_image *texImage);
-
-void intelTexSubImage2D(GLcontext * ctx,
-                        GLenum target,
-                        GLint level,
-                        GLint xoffset, GLint yoffset,
-                        GLsizei width, GLsizei height,
-                        GLenum format, GLenum type,
-                        const GLvoid * pixels,
-                        const struct gl_pixelstore_attrib *packing,
-                        struct gl_texture_object *texObj,
-                        struct gl_texture_image *texImage);
-
-void intelTexImage1D(GLcontext * ctx,
-                     GLenum target, GLint level,
-                     GLint internalFormat,
-                     GLint width, GLint border,
-                     GLenum format, GLenum type, const void *pixels,
-                     const struct gl_pixelstore_attrib *packing,
-                     struct gl_texture_object *texObj,
-                     struct gl_texture_image *texImage);
-
-void intelTexSubImage1D(GLcontext * ctx,
-                        GLenum target,
-                        GLint level,
-                        GLint xoffset,
-                        GLsizei width,
-                        GLenum format, GLenum type,
-                        const GLvoid * pixels,
-                        const struct gl_pixelstore_attrib *packing,
-                        struct gl_texture_object *texObj,
-                        struct gl_texture_image *texImage);
-
-void intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level,
-                         GLenum internalFormat,
-                         GLint x, GLint y, GLsizei width, GLint border);
-
-void intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level,
-                         GLenum internalFormat,
-                         GLint x, GLint y, GLsizei width, GLsizei height,
-                         GLint border);
-
-void intelCopyTexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
-                            GLint xoffset, GLint x, GLint y, GLsizei width);
-
-void intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
-                            GLint xoffset, GLint yoffset,
-                            GLint x, GLint y, GLsizei width, GLsizei height);
-
-void intelGetTexImage(GLcontext * ctx, GLenum target, GLint level,
-                      GLenum format, GLenum type, GLvoid * pixels,
-                      struct gl_texture_object *texObj,
-                      struct gl_texture_image *texImage);
-
-void intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level,
-				GLint internalFormat,
-				GLint width, GLint height, GLint border,
-				GLsizei imageSize, const GLvoid *data,
-				struct gl_texture_object *texObj,
-				struct gl_texture_image *texImage );
-
-void intelCompressedTexSubImage2D(GLcontext * ctx,
-				  GLenum target,
-				  GLint level,
-				  GLint xoffset, GLint yoffset,
-				  GLsizei width, GLsizei height,
-				  GLenum format, GLsizei imageSize,
-				  const GLvoid * pixels,
-				  struct gl_texture_object *texObj,
-				  struct gl_texture_image *texImage);
-
-void intelGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level,
-				GLvoid *pixels,
-				struct gl_texture_object *texObj,
-				struct gl_texture_image *texImage);
-
 void intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname,
 		       unsigned long long offset, GLint depth, GLuint pitch);
 void intelSetTexBuffer(__DRIcontext *pDRICtx,
diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c
index 90bbb8c6bb..028b49c14d 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c
@@ -73,11 +73,8 @@ get_teximage_source(struct intel_context *intel, GLenum internalFormat)
       return NULL;
    case GL_RGBA:
    case GL_RGBA8:
-      return intel_readbuf_region(intel);
    case GL_RGB:
-      if (intel->ctx.Visual.rgbBits == 16)
-         return intel_readbuf_region(intel);
-      return NULL;
+      return intel_readbuf_region(intel);
    default:
       return NULL;
    }
@@ -99,14 +96,24 @@ do_copy_texsubimage(struct intel_context *intel,
 
    if (!intelImage->mt || !src) {
       if (INTEL_DEBUG & DEBUG_FALLBACKS)
-	 fprintf(stderr, "%s fail %p %p\n",
-		 __FUNCTION__, intelImage->mt, src);
+	 fprintf(stderr, "%s fail %p %p (0x%08x)\n",
+		 __FUNCTION__, intelImage->mt, src, internalFormat);
+      return GL_FALSE;
+   }
+
+   if (intelImage->mt->cpp != src->cpp) {
+      if (INTEL_DEBUG & DEBUG_FALLBACKS)
+	 fprintf(stderr, "%s fail %d vs %d cpp\n",
+		 __FUNCTION__, intelImage->mt->cpp, src->cpp);
       return GL_FALSE;
    }
 
    intelFlush(ctx);
    LOCK_HARDWARE(intel);
    {
+      drm_intel_bo *dst_bo = intel_region_buffer(intel,
+						 intelImage->mt->region,
+						 INTEL_WRITE_PART);
       GLuint image_offset = intel_miptree_image_offset(intelImage->mt,
                                                        intelImage->face,
                                                        intelImage->level);
@@ -118,8 +125,12 @@ do_copy_texsubimage(struct intel_context *intel,
       dstx += x - orig_x;
       dsty += y - orig_y;
 
-      /* image_offset may be non-page-aligned, but that's illegal for tiling. */
-      assert(intelImage->mt->region->tiling == I915_TILING_NONE);
+      /* Can't blit to tiled buffers with non-tile-aligned offset. */
+      if (intelImage->mt->region->tiling != I915_TILING_NONE &&
+	  (image_offset & 4095) != 0) {
+	 UNLOCK_HARDWARE(intel);
+	 return GL_FALSE;
+      }
 
       if (ctx->ReadBuffer->Name == 0) {
 	 /* reading from a window, adjust x, y */
@@ -140,35 +151,35 @@ do_copy_texsubimage(struct intel_context *intel,
 	 src_pitch = src->pitch;
       }
 
-      intelEmitCopyBlit(intel,
-			intelImage->mt->cpp,
-			src_pitch,
-			src->buffer,
-			0,
-			src->tiling,
-			intelImage->mt->pitch,
-			intelImage->mt->region->buffer,
-			image_offset,
-			intelImage->mt->region->tiling,
-			x, y, dstx, dsty, width, height,
-			GL_COPY);
+      if (!intelEmitCopyBlit(intel,
+			     intelImage->mt->cpp,
+			     src_pitch,
+			     src->buffer,
+			     0,
+			     src->tiling,
+			     intelImage->mt->pitch,
+			     dst_bo,
+			     image_offset,
+			     intelImage->mt->region->tiling,
+			     x, y, dstx, dsty, width, height,
+			     GL_COPY)) {
+	 UNLOCK_HARDWARE(intel);
+	 return GL_FALSE;
+      }
    }
 
    UNLOCK_HARDWARE(intel);
 
    /* GL_SGIS_generate_mipmap */
    if (intelImage->level == texObj->BaseLevel && texObj->GenerateMipmap) {
-      ctx->Driver.GenerateMipmap(ctx, target, texObj);
+      intel_generate_mipmap(ctx, target, texObj);
    }
 
    return GL_TRUE;
 }
 
 
-
-
-
-void
+static void
 intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level,
                     GLenum internalFormat,
                     GLint x, GLint y, GLsizei width, GLint border)
@@ -214,7 +225,8 @@ intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level,
                            width, border);
 }
 
-void
+
+static void
 intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level,
                     GLenum internalFormat,
                     GLint x, GLint y, GLsizei width, GLsizei height,
@@ -262,7 +274,7 @@ intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level,
 }
 
 
-void
+static void
 intelCopyTexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
                        GLint xoffset, GLint x, GLint y, GLsizei width)
 {
@@ -287,8 +299,7 @@ intelCopyTexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
 }
 
 
-
-void
+static void
 intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
                        GLint xoffset, GLint yoffset,
                        GLint x, GLint y, GLsizei width, GLsizei height)
@@ -301,7 +312,6 @@ intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
       _mesa_select_tex_image(ctx, texObj, target, level);
    GLenum internalFormat = texImage->InternalFormat;
 
-
    /* Need to check texture is compatible with source format. 
     */
 
@@ -316,3 +326,13 @@ intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
                                  xoffset, yoffset, x, y, width, height);
    }
 }
+
+
+void
+intelInitTextureCopyImageFuncs(struct dd_function_table *functions)
+{
+   functions->CopyTexImage1D = intelCopyTexImage1D;
+   functions->CopyTexImage2D = intelCopyTexImage2D;
+   functions->CopyTexSubImage1D = intelCopyTexSubImage1D;
+   functions->CopyTexSubImage2D = intelCopyTexSubImage2D;
+}
diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c
index 5e61e9e95e..c5f5220837 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_image.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_image.c
@@ -131,6 +131,7 @@ guess_and_alloc_mipmap_tree(struct intel_context *intel,
       comp_byte = intel_compressed_num_bytes(intelImage->base.TexFormat->MesaFormat);
    intelObj->mt = intel_miptree_create(intel,
                                        intelObj->base.Target,
+                                       intelImage->base._BaseFormat,
                                        intelImage->base.InternalFormat,
                                        firstLevel,
                                        lastLevel,
@@ -205,7 +206,7 @@ try_pbo_upload(struct intel_context *intel,
    GLuint src_offset, src_stride;
    GLuint dst_offset, dst_stride;
 
-   if (!pbo ||
+   if (unpack->BufferObj->Name == 0 ||
        intel->ctx._ImageTransferState ||
        unpack->SkipPixels || unpack->SkipRows) {
       DBG("%s: failure 1\n", __FUNCTION__);
@@ -235,12 +236,15 @@ try_pbo_upload(struct intel_context *intel,
 					       INTEL_WRITE_FULL);
 
 
-      intelEmitCopyBlit(intel,
-                        intelImage->mt->cpp,
-                        src_stride, src_buffer, src_offset, GL_FALSE,
-                        dst_stride, dst_buffer, dst_offset, GL_FALSE,
-                        0, 0, 0, 0, width, height,
-			GL_COPY);
+      if (!intelEmitCopyBlit(intel,
+			     intelImage->mt->cpp,
+			     src_stride, src_buffer, src_offset, GL_FALSE,
+			     dst_stride, dst_buffer, dst_offset, GL_FALSE,
+			     0, 0, 0, 0, width, height,
+			     GL_COPY)) {
+	 UNLOCK_HARDWARE(intel);
+	 return GL_FALSE;
+      }
    }
    UNLOCK_HARDWARE(intel);
 
@@ -248,7 +252,6 @@ try_pbo_upload(struct intel_context *intel,
 }
 
 
-
 static GLboolean
 try_pbo_zcopy(struct intel_context *intel,
               struct intel_texture_image *intelImage,
@@ -261,7 +264,7 @@ try_pbo_zcopy(struct intel_context *intel,
    GLuint src_offset, src_stride;
    GLuint dst_offset, dst_stride;
 
-   if (!pbo ||
+   if (unpack->BufferObj->Name == 0 ||
        intel->ctx._ImageTransferState ||
        unpack->SkipPixels || unpack->SkipRows) {
       DBG("%s: failure 1\n", __FUNCTION__);
@@ -293,10 +296,6 @@ try_pbo_zcopy(struct intel_context *intel,
 }
 
 
-
-
-
-
 static void
 intelTexImage(GLcontext * ctx,
               GLint dims,
@@ -307,7 +306,8 @@ intelTexImage(GLcontext * ctx,
               GLenum format, GLenum type, const void *pixels,
               const struct gl_pixelstore_attrib *unpack,
               struct gl_texture_object *texObj,
-              struct gl_texture_image *texImage, GLsizei imageSize, int compressed)
+              struct gl_texture_image *texImage, GLsizei imageSize,
+              GLboolean compressed)
 {
    struct intel_context *intel = intel_context(ctx);
    struct intel_texture_object *intelObj = intel_texture_object(texObj);
@@ -316,7 +316,6 @@ intelTexImage(GLcontext * ctx,
    GLint postConvHeight = height;
    GLint texelBytes, sizeInBytes;
    GLuint dstRowStride = 0, srcRowStride = texImage->RowStride;
-   GLboolean needs_map;
 
    DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__,
        _mesa_lookup_enum_by_nr(target), level, width, height, depth, border);
@@ -414,7 +413,9 @@ intelTexImage(GLcontext * ctx,
        * a miptree, so create one just for our level and store it in the image.
        * It'll get moved into the object miptree at validate time.
        */
-      intelImage->mt = intel_miptree_create(intel, target, internalFormat,
+      intelImage->mt = intel_miptree_create(intel, target,
+					    intelImage->base.TexFormat->BaseFormat,
+					    internalFormat,
 					    level, level,
 					    width, height, depth,
 					    intelImage->base.TexFormat->TexelBytes,
@@ -426,7 +427,7 @@ intelTexImage(GLcontext * ctx,
     */
    if (dims <= 2 &&
        intelImage->mt &&
-       intel_buffer_object(unpack->BufferObj) &&
+       unpack->BufferObj->Name != 0 &&
        check_pbo_format(internalFormat, format,
                         type, intelImage->base.TexFormat)) {
 
@@ -464,8 +465,6 @@ intelTexImage(GLcontext * ctx,
       DBG("pbo upload failed\n");
    }
 
-
-
    /* intelCopyTexImage calls this function with pixels == NULL, with
     * the expectation that the mipmap tree will be set up but nothing
     * more will be done.  This is where those calls return:
@@ -482,15 +481,8 @@ intelTexImage(GLcontext * ctx,
 
    LOCK_HARDWARE(intel);
 
-   /* Two cases where we need a mapping of the miptree: when the user supplied
-    * data is mapped as well (non-PBO, memcpy upload) or when we're going to do
-    * (software) mipmap generation.
-    */
-   needs_map = (pixels != NULL) || (level == texObj->BaseLevel &&
-				  texObj->GenerateMipmap);
-
    if (intelImage->mt) {
-      if (needs_map)
+      if (pixels != NULL)
          texImage->Data = intel_miptree_image_map(intel,
                                                   intelImage->mt,
                                                   intelImage->face,
@@ -547,25 +539,26 @@ intelTexImage(GLcontext * ctx,
 						   format, type, pixels, unpack)) {
 	   _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
        }
-
-       /* GL_SGIS_generate_mipmap */
-       if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
-	  intel_generate_mipmap(ctx, target, texObj);
-       }
    }
 
    _mesa_unmap_teximage_pbo(ctx, unpack);
 
    if (intelImage->mt) {
-      if (needs_map)
+      if (pixels != NULL)
          intel_miptree_image_unmap(intel, intelImage->mt);
       texImage->Data = NULL;
    }
 
    UNLOCK_HARDWARE(intel);
+
+   /* GL_SGIS_generate_mipmap */
+   if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
+      intel_generate_mipmap(ctx, target, texObj);
+   }
 }
 
-void
+
+static void
 intelTexImage3D(GLcontext * ctx,
                 GLenum target, GLint level,
                 GLint internalFormat,
@@ -578,11 +571,11 @@ intelTexImage3D(GLcontext * ctx,
 {
    intelTexImage(ctx, 3, target, level,
                  internalFormat, width, height, depth, border,
-                 format, type, pixels, unpack, texObj, texImage, 0, 0);
+                 format, type, pixels, unpack, texObj, texImage, 0, GL_FALSE);
 }
 
 
-void
+static void
 intelTexImage2D(GLcontext * ctx,
                 GLenum target, GLint level,
                 GLint internalFormat,
@@ -594,10 +587,11 @@ intelTexImage2D(GLcontext * ctx,
 {
    intelTexImage(ctx, 2, target, level,
                  internalFormat, width, height, 1, border,
-                 format, type, pixels, unpack, texObj, texImage, 0, 0);
+                 format, type, pixels, unpack, texObj, texImage, 0, GL_FALSE);
 }
 
-void
+
+static void
 intelTexImage1D(GLcontext * ctx,
                 GLenum target, GLint level,
                 GLint internalFormat,
@@ -609,21 +603,24 @@ intelTexImage1D(GLcontext * ctx,
 {
    intelTexImage(ctx, 1, target, level,
                  internalFormat, width, 1, 1, border,
-                 format, type, pixels, unpack, texObj, texImage, 0, 0);
+                 format, type, pixels, unpack, texObj, texImage, 0, GL_FALSE);
 }
 
-void intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level,
-				GLint internalFormat,
-				GLint width, GLint height, GLint border,
-				GLsizei imageSize, const GLvoid *data,
-				struct gl_texture_object *texObj,
-				struct gl_texture_image *texImage )
+
+static void
+intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level,
+                           GLint internalFormat,
+                           GLint width, GLint height, GLint border,
+                           GLsizei imageSize, const GLvoid *data,
+                           struct gl_texture_object *texObj,
+                           struct gl_texture_image *texImage )
 {
    intelTexImage(ctx, 2, target, level,
 		 internalFormat, width, height, 1, border,
-		 0, 0, data, &ctx->Unpack, texObj, texImage, imageSize, 1);
+		 0, 0, data, &ctx->Unpack, texObj, texImage, imageSize, GL_TRUE);
 }
 
+
 /**
  * Need to map texture image into memory before copying image data,
  * then unmap it.
@@ -632,7 +629,7 @@ static void
 intel_get_tex_image(GLcontext * ctx, GLenum target, GLint level,
 		    GLenum format, GLenum type, GLvoid * pixels,
 		    struct gl_texture_object *texObj,
-		    struct gl_texture_image *texImage, int compressed)
+		    struct gl_texture_image *texImage, GLboolean compressed)
 {
    struct intel_context *intel = intel_context(ctx);
    struct intel_texture_image *intelImage = intel_texture_image(texImage);
@@ -686,28 +683,29 @@ intel_get_tex_image(GLcontext * ctx, GLenum target, GLint level,
    }
 }
 
-void
+
+static void
 intelGetTexImage(GLcontext * ctx, GLenum target, GLint level,
                  GLenum format, GLenum type, GLvoid * pixels,
                  struct gl_texture_object *texObj,
                  struct gl_texture_image *texImage)
 {
    intel_get_tex_image(ctx, target, level, format, type, pixels,
-		       texObj, texImage, 0);
-
-
+		       texObj, texImage, GL_FALSE);
 }
 
-void
+
+static void
 intelGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level,
 			   GLvoid *pixels,
 			   struct gl_texture_object *texObj,
 			   struct gl_texture_image *texImage)
 {
    intel_get_tex_image(ctx, target, level, 0, 0, pixels,
-		       texObj, texImage, 1);
+		       texObj, texImage, GL_TRUE);
 }
 
+
 void
 intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname,
 		  unsigned long long offset, GLint depth, GLuint pitch)
@@ -816,3 +814,16 @@ intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
     */
    intelSetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv);
 }
+
+
+void
+intelInitTextureImageFuncs(struct dd_function_table *functions)
+{
+   functions->TexImage1D = intelTexImage1D;
+   functions->TexImage2D = intelTexImage2D;
+   functions->TexImage3D = intelTexImage3D;
+   functions->GetTexImage = intelGetTexImage;
+
+   functions->CompressedTexImage2D = intelCompressedTexImage2D;
+   functions->GetCompressedTexImage = intelGetCompressedTexImage;
+}
diff --git a/src/mesa/drivers/dri/intel/intel_tex_layout.c b/src/mesa/drivers/dri/intel/intel_tex_layout.c
index e6f9a41779..7d69ea4484 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_layout.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_layout.c
@@ -35,26 +35,39 @@
 #include "intel_context.h"
 #include "main/macros.h"
 
-GLuint intel_compressed_alignment(GLenum internalFormat)
+void intel_get_texture_alignment_unit(GLenum internalFormat, GLuint *w, GLuint *h)
 {
-    GLuint alignment = 4;
-
     switch (internalFormat) {
     case GL_COMPRESSED_RGB_FXT1_3DFX:
     case GL_COMPRESSED_RGBA_FXT1_3DFX:
-        alignment = 8;
+        *w = 8;
+        *h = 4;
+        break;
+
+    case GL_RGB_S3TC:
+    case GL_RGB4_S3TC:
+    case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+    case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+    case GL_RGBA_S3TC:
+    case GL_RGBA4_S3TC:
+    case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+    case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+        *w = 4;
+        *h = 4;
         break;
 
     default:
+        *w = 4;
+        *h = 2;
         break;
     }
-
-    return alignment;
 }
 
-void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tree *mt )
+void i945_miptree_layout_2d( struct intel_context *intel,
+			     struct intel_mipmap_tree *mt,
+			     uint32_t tiling )
 {
-   GLint align_h = 2, align_w = 4;
+   GLuint align_h = 2, align_w = 4;
    GLuint level;
    GLuint x = 0;
    GLuint y = 0;
@@ -62,9 +75,9 @@ void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tr
    GLuint height = mt->height0;
 
    mt->pitch = mt->width0;
+   intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h);
 
    if (mt->compressed) {
-       align_w = intel_compressed_alignment(mt->internal_format);
        mt->pitch = ALIGN(mt->width0, align_w);
    }
 
@@ -92,7 +105,7 @@ void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tr
    /* Pitch must be a whole number of dwords, even though we
     * express it in texels.
     */
-   mt->pitch = intel_miptree_pitch_align (intel, mt, mt->pitch);
+   mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->pitch);
    mt->total_height = 0;
 
    for ( level = mt->first_level ; level <= mt->last_level ; level++ ) {
diff --git a/src/mesa/drivers/dri/intel/intel_tex_layout.h b/src/mesa/drivers/dri/intel/intel_tex_layout.h
index dbc90e6f9b..c9de9b5678 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_layout.h
+++ b/src/mesa/drivers/dri/intel/intel_tex_layout.h
@@ -38,5 +38,7 @@ static GLuint minify( GLuint d )
    return MAX2(1, d>>1);
 }
 
-extern void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tree *mt );
-extern GLuint intel_compressed_alignment(GLenum);
+extern void i945_miptree_layout_2d(struct intel_context *intel,
+				   struct intel_mipmap_tree *mt,
+				   uint32_t tiling);
+extern void intel_get_texture_alignment_unit(GLenum, GLuint *, GLuint *);
diff --git a/src/mesa/drivers/dri/intel/intel_tex_subimage.c b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
index f86de56897..89037073f8 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
@@ -44,10 +44,12 @@ intelTexSubimage(GLcontext * ctx,
                  GLenum target, GLint level,
                  GLint xoffset, GLint yoffset, GLint zoffset,
                  GLint width, GLint height, GLint depth,
+                 GLsizei imageSize,
                  GLenum format, GLenum type, const void *pixels,
                  const struct gl_pixelstore_attrib *packing,
                  struct gl_texture_object *texObj,
-                 struct gl_texture_image *texImage)
+                 struct gl_texture_image *texImage,
+                 GLboolean compressed)
 {
    struct intel_context *intel = intel_context(ctx);
    struct intel_texture_image *intelImage = intel_texture_image(texImage);
@@ -59,9 +61,14 @@ intelTexSubimage(GLcontext * ctx,
 
    intelFlush(ctx);
 
-   pixels =
-      _mesa_validate_pbo_teximage(ctx, dims, width, height, depth, format,
-                                  type, pixels, packing, "glTexSubImage2D");
+   if (compressed)
+      pixels = _mesa_validate_pbo_compressed_teximage(ctx, imageSize,
+                                                      pixels, packing,
+                                                      "glCompressedTexImage");
+   else
+      pixels = _mesa_validate_pbo_teximage(ctx, dims, width, height, depth,
+                                           format, type, pixels, packing,
+                                           "glTexSubImage");
    if (!pixels)
       return;
 
@@ -90,20 +97,28 @@ intelTexSubimage(GLcontext * ctx,
 
    assert(dstRowStride);
 
-   if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat,
-                                        texImage->TexFormat,
-                                        texImage->Data,
-                                        xoffset, yoffset, zoffset,
-                                        dstRowStride,
-                                        texImage->ImageOffsets,
-                                        width, height, depth,
-                                        format, type, pixels, packing)) {
-      _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage");
+   if (compressed) {
+      if (intelImage->mt) {
+         struct intel_region *dst = intelImage->mt->region;
+         
+         _mesa_copy_rect(texImage->Data, dst->cpp, dst->pitch,
+                         xoffset, yoffset / 4,
+                         (width + 3)  & ~3, (height + 3) / 4,
+                         pixels, (width + 3) & ~3, 0, 0);
+      } else
+        memcpy(texImage->Data, pixels, imageSize);
    }
-
-   /* GL_SGIS_generate_mipmap */
-   if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
-      intel_generate_mipmap(ctx, target, texObj);
+   else {
+      if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat,
+                                           texImage->TexFormat,
+                                           texImage->Data,
+                                           xoffset, yoffset, zoffset,
+                                           dstRowStride,
+                                           texImage->ImageOffsets,
+                                           width, height, depth,
+                                           format, type, pixels, packing)) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage");
+      }
    }
 
    _mesa_unmap_teximage_pbo(ctx, packing);
@@ -114,13 +129,15 @@ intelTexSubimage(GLcontext * ctx,
    }
 
    UNLOCK_HARDWARE(intel);
-}
-
-
 
+   /* GL_SGIS_generate_mipmap */
+   if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
+      intel_generate_mipmap(ctx, target, texObj);
+   }
+}
 
 
-void
+static void
 intelTexSubImage3D(GLcontext * ctx,
                    GLenum target,
                    GLint level,
@@ -132,18 +149,15 @@ intelTexSubImage3D(GLcontext * ctx,
                    struct gl_texture_object *texObj,
                    struct gl_texture_image *texImage)
 {
-
    intelTexSubimage(ctx, 3,
                     target, level,
                     xoffset, yoffset, zoffset,
-                    width, height, depth,
-                    format, type, pixels, packing, texObj, texImage);
-
+                    width, height, depth, 0,
+                    format, type, pixels, packing, texObj, texImage, GL_FALSE);
 }
 
 
-
-void
+static void
 intelTexSubImage2D(GLcontext * ctx,
                    GLenum target,
                    GLint level,
@@ -155,17 +169,15 @@ intelTexSubImage2D(GLcontext * ctx,
                    struct gl_texture_object *texObj,
                    struct gl_texture_image *texImage)
 {
-
    intelTexSubimage(ctx, 2,
                     target, level,
                     xoffset, yoffset, 0,
-                    width, height, 1,
-                    format, type, pixels, packing, texObj, texImage);
-
+                    width, height, 1, 0,
+                    format, type, pixels, packing, texObj, texImage, GL_FALSE);
 }
 
 
-void
+static void
 intelTexSubImage1D(GLcontext * ctx,
                    GLenum target,
                    GLint level,
@@ -180,12 +192,11 @@ intelTexSubImage1D(GLcontext * ctx,
    intelTexSubimage(ctx, 1,
                     target, level,
                     xoffset, 0, 0,
-                    width, 1, 1,
-                    format, type, pixels, packing, texObj, texImage);
-
+                    width, 1, 1, 0,
+                    format, type, pixels, packing, texObj, texImage, GL_FALSE);
 }
 
-void
+static void
 intelCompressedTexSubImage2D(GLcontext * ctx,
 			     GLenum target,
 			     GLint level,
@@ -196,6 +207,20 @@ intelCompressedTexSubImage2D(GLcontext * ctx,
 			     struct gl_texture_object *texObj,
 			     struct gl_texture_image *texImage)
 {
-   fprintf(stderr, "stubbed CompressedTexSubImage2D: %dx%d@%dx%d\n",
-	   width, height, xoffset, yoffset);
+   intelTexSubimage(ctx, 2,
+                    target, level,
+                    xoffset, yoffset, 0,
+                    width, height, 1, imageSize,
+                    format, 0, pixels, &ctx->Unpack, texObj, texImage, GL_TRUE);
+}
+
+
+
+void
+intelInitTextureSubImageFuncs(struct dd_function_table *functions)
+{
+   functions->TexSubImage1D = intelTexSubImage1D;
+   functions->TexSubImage2D = intelTexSubImage2D;
+   functions->TexSubImage3D = intelTexSubImage3D;
+   functions->CompressedTexSubImage2D = intelCompressedTexSubImage2D;
 }
diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c
index 05a375e1f3..a284d5475f 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c
@@ -199,6 +199,7 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
    if (!intelObj->mt) {
       intelObj->mt = intel_miptree_create(intel,
                                           intelObj->base.Target,
+                                          firstImage->base._BaseFormat,
                                           firstImage->base.InternalFormat,
                                           intelObj->firstLevel,
                                           intelObj->lastLevel,
@@ -241,7 +242,7 @@ intel_tex_map_level_images(struct intel_context *intel,
       struct intel_texture_image *intelImage =
 	 intel_texture_image(intelObj->base.Image[face][level]);
 
-      if (intelImage->mt) {
+      if (intelImage && intelImage->mt) {
 	 intelImage->base.Data =
 	    intel_miptree_image_map(intel,
 				    intelImage->mt,
@@ -268,7 +269,7 @@ intel_tex_unmap_level_images(struct intel_context *intel,
       struct intel_texture_image *intelImage =
 	 intel_texture_image(intelObj->base.Image[face][level]);
 
-      if (intelImage->mt) {
+      if (intelImage && intelImage->mt) {
 	 intel_miptree_image_unmap(intel, intelImage->mt);
 	 intelImage->base.Data = NULL;
       }