28 files changed, 2146 insertions, 717 deletions
diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c
index 373d332f85..479c30669c 100644
--- a/src/mesa/drivers/dri/intel/intel_blit.c
+++ b/src/mesa/drivers/dri/intel/intel_blit.c
@@ -80,18 +80,22 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
 
    if (dPriv && dPriv->numClipRects) {
       struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
-      const struct intel_region *frontRegion
-	 = intel_get_rb_region(&intel_fb->Base, BUFFER_FRONT_LEFT);
-      const struct intel_region *backRegion
-	 = intel_get_rb_region(&intel_fb->Base, BUFFER_BACK_LEFT);
-      const int nbox = dPriv->numClipRects;
-      const drm_clip_rect_t *pbox = dPriv->pClipRects;
-      const int cpp = frontRegion->cpp;
-      int src_pitch = backRegion->pitch * cpp;
-      int dst_pitch = frontRegion->pitch * cpp;
+      struct intel_region *src, *dst;
+      int nbox = dPriv->numClipRects;
+      drm_clip_rect_t *pbox = dPriv->pClipRects;
+      int cpp;
+      int src_pitch, dst_pitch;
       int BR13, CMD;
       int i;
 
+      src = intel_get_rb_region(&intel_fb->Base, BUFFER_BACK_LEFT);
+      dst = intel_get_rb_region(&intel_fb->Base, BUFFER_FRONT_LEFT);
+
+      src_pitch = src->pitch * src->cpp;
+      dst_pitch = dst->pitch * dst->cpp;
+
+      cpp = src->cpp;
+
       ASSERT(intel_fb);
       ASSERT(intel_fb->Base.Name == 0);    /* Not a user-created FBO */
       ASSERT(frontRegion);
@@ -109,40 +113,30 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
       }
 
 #ifndef I915
-      if (backRegion->tiled) {
+      if (src->tiled) {
 	 CMD |= XY_SRC_TILED;
 	 src_pitch /= 4;
       }
-      if (frontRegion->tiled) {
+      if (dst->tiled) {
 	 CMD |= XY_DST_TILED;
 	 dst_pitch /= 4;
       }
 #endif
 
       for (i = 0; i < nbox; i++, pbox++) {
-	 drm_clip_rect_t box;
-
-	 if (pbox->x1 >= pbox->x2 ||
-	     pbox->y1 >= pbox->y2 ||
-	     pbox->x2 > intelScreen->width || pbox->y2 > intelScreen->height)
-	    continue;
-
-	 box = *pbox;
+	 drm_clip_rect_t box = *pbox;
 
 	 if (rect) {
-	    if (rect->x1 > box.x1)
-	       box.x1 = rect->x1;
-	    if (rect->y1 > box.y1)
-	       box.y1 = rect->y1;
-	    if (rect->x2 < box.x2)
-	       box.x2 = rect->x2;
-	    if (rect->y2 < box.y2)
-	       box.y2 = rect->y2;
-
-	    if (box.x1 >= box.x2 || box.y1 >= box.y2)
+	    if (!intel_intersect_cliprects(&box, &box, rect))
 	       continue;
 	 }
 
+	 if (box.x1 >= box.x2 ||
+	     box.y1 >= box.y2 ||
+	     box.x2 > intelScreen->width ||
+	     box.y2 > intelScreen->height)
+	    continue;
+
 	 assert(box.x1 < box.x2);
 	 assert(box.y1 < box.y2);
 
@@ -152,13 +146,10 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
 	 OUT_BATCH((box.y1 << 16) | box.x1);
 	 OUT_BATCH((box.y2 << 16) | box.x2);
 
-	 OUT_RELOC(frontRegion->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
-		   0);
+	 OUT_RELOC(dst->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, 0);
 	 OUT_BATCH((box.y1 << 16) | box.x1);
 	 OUT_BATCH(src_pitch);
-	 OUT_RELOC(backRegion->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
-		   0);
-
+	 OUT_RELOC(src->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0);
 	 ADVANCE_BATCH();
       }
 
@@ -178,12 +169,14 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
 
 void
 intelEmitFillBlit(struct intel_context *intel,
-                  GLuint cpp,
-                  GLshort dst_pitch,
-                  dri_bo *dst_buffer,
-                  GLuint dst_offset,
+		  GLuint cpp,
+		  GLshort dst_pitch,
+		  dri_bo *dst_buffer,
+		  GLuint dst_offset,
 		  GLboolean dst_tiled,
-                  GLshort x, GLshort y, GLshort w, GLshort h, GLuint color)
+		  GLshort x, GLshort y,
+		  GLshort w, GLshort h,
+		  GLuint color)
 {
    GLuint BR13, CMD;
    BATCH_LOCALS;
@@ -227,7 +220,6 @@ intelEmitFillBlit(struct intel_context *intel,
    ADVANCE_BATCH();
 }
 
-
 static GLuint translate_raster_op(GLenum logicop)
 {
    switch(logicop) {
@@ -256,17 +248,17 @@ static GLuint translate_raster_op(GLenum logicop)
  */
 void
 intelEmitCopyBlit(struct intel_context *intel,
-                  GLuint cpp,
-                  GLshort src_pitch,
-                  dri_bo *src_buffer,
-                  GLuint src_offset,
+		  GLuint cpp,
+		  GLshort src_pitch,
+		  dri_bo *src_buffer,
+		  GLuint src_offset,
 		  GLboolean src_tiled,
-                  GLshort dst_pitch,
-                  dri_bo *dst_buffer,
-                  GLuint dst_offset,
+		  GLshort dst_pitch,
+		  dri_bo *dst_buffer,
+		  GLuint dst_offset,
 		  GLboolean dst_tiled,
-                  GLshort src_x, GLshort src_y,
-                  GLshort dst_x, GLshort dst_y, 
+		  GLshort src_x, GLshort src_y,
+		  GLshort dst_x, GLshort dst_y,
 		  GLshort w, GLshort h,
 		  GLenum logic_op)
 {
@@ -284,7 +276,7 @@ intelEmitCopyBlit(struct intel_context *intel,
    src_pitch *= cpp;
    dst_pitch *= cpp;
 
-   BR13 = (translate_raster_op(logic_op) << 16);
+   BR13 = translate_raster_op(logic_op) << 16;
 
    switch (cpp) {
    case 1:
@@ -316,6 +308,8 @@ intelEmitCopyBlit(struct intel_context *intel,
       return;
    }
 
+   dst_pitch &= 0xffff;
+   src_pitch &= 0xffff;
 
    /* Initial y values don't seem to work with negative pitches.  If
     * we adjust the offsets manually (below), it seems to work fine.
@@ -333,10 +327,12 @@ intelEmitCopyBlit(struct intel_context *intel,
       OUT_BATCH(BR13 | dst_pitch);
       OUT_BATCH((dst_y << 16) | dst_x);
       OUT_BATCH((dst_y2 << 16) | dst_x2);
-      OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset);
+      OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
+		dst_offset);
       OUT_BATCH((src_y << 16) | src_x);
       OUT_BATCH(src_pitch);
-      OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, src_offset);
+      OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+		src_offset);
       ADVANCE_BATCH();
    }
    else {
@@ -349,11 +345,11 @@ intelEmitCopyBlit(struct intel_context *intel,
       OUT_BATCH((0 << 16) | dst_x);
       OUT_BATCH((h << 16) | dst_x2);
       OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
-                dst_offset + dst_y * dst_pitch);
+		dst_offset + dst_y * dst_pitch);
       OUT_BATCH((0 << 16) | src_x);
       OUT_BATCH(src_pitch);
       OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
-                src_offset + src_y * src_pitch);
+		src_offset + src_y * src_pitch);
       ADVANCE_BATCH();
    }
 }
@@ -367,7 +363,7 @@ intelEmitCopyBlit(struct intel_context *intel,
  * \param mask  bitmask of BUFFER_BIT_* values indicating buffers to clear
  */
 void
-intelClearWithBlit(GLcontext * ctx, GLbitfield mask)
+intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
 {
    struct intel_context *intel = intel_context(ctx);
    struct gl_framebuffer *fb = ctx->DrawBuffer;
@@ -375,8 +371,6 @@ intelClearWithBlit(GLcontext * ctx, GLbitfield mask)
    GLbitfield skipBuffers = 0;
    BATCH_LOCALS;
 
-   DBG("%s %x\n", __FUNCTION__, mask);
-
    /*
     * Compute values for clearing the buffers.
     */
@@ -542,3 +536,77 @@ intelClearWithBlit(GLcontext * ctx, GLbitfield mask)
 
    UNLOCK_HARDWARE(intel);
 }
+
+void
+intelEmitImmediateColorExpandBlit(struct intel_context *intel,
+				  GLuint cpp,
+				  GLubyte *src_bits, GLuint src_size,
+				  GLuint fg_color,
+				  GLshort dst_pitch,
+				  dri_bo *dst_buffer,
+				  GLuint dst_offset,
+				  GLboolean dst_tiled,
+				  GLshort x, GLshort y,
+				  GLshort w, GLshort h,
+				  GLenum logic_op)
+{
+   int dwords = ALIGN(src_size, 8) / 4;
+   uint32_t opcode, br13, blit_cmd;
+
+   assert( logic_op - GL_CLEAR >= 0 );
+   assert( logic_op - GL_CLEAR < 0x10 );
+
+   if (w < 0 || h < 0)
+      return;
+
+   dst_pitch *= cpp;
+
+   if (dst_tiled)
+      dst_pitch /= 4;
+
+   DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n",
+       __FUNCTION__,
+       dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);
+
+   intel_batchbuffer_require_space( intel->batch,
+				    (8 * 4) +
+				    (3 * 4) +
+				    dwords,
+				    INTEL_BATCH_NO_CLIPRECTS );
+
+   opcode = XY_SETUP_BLT_CMD;
+   if (cpp == 4)
+      opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
+   if (dst_tiled)
+      opcode |= XY_DST_TILED;
+
+   br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29);
+   if (cpp == 2)
+      br13 |= BR13_565;
+   else
+      br13 |= BR13_8888;
+
+   blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */
+   if (dst_tiled)
+      blit_cmd |= XY_DST_TILED;
+
+   BEGIN_BATCH(8 + 3, INTEL_BATCH_NO_CLIPRECTS);
+   OUT_BATCH(opcode);
+   OUT_BATCH(br13);
+   OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
+   OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */
+   OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset);
+   OUT_BATCH(0); /* bg */
+   OUT_BATCH(fg_color); /* fg */
+   OUT_BATCH(0); /* pattern base addr */
+
+   OUT_BATCH(blit_cmd | ((3 - 2) + dwords));
+   OUT_BATCH((y << 16) | x);
+   OUT_BATCH(((y + h) << 16) | (x + w));
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_data( intel->batch,
+			   src_bits,
+			   dwords * 4,
+			   INTEL_BATCH_NO_CLIPRECTS );
+}
diff --git a/src/mesa/drivers/dri/intel/intel_blit.h b/src/mesa/drivers/dri/intel/intel_blit.h
index 35cc8868d9..fc0620caba 100644
--- a/src/mesa/drivers/dri/intel/intel_blit.h
+++ b/src/mesa/drivers/dri/intel/intel_blit.h
@@ -61,5 +61,17 @@ extern void intelEmitFillBlit(struct intel_context *intel,
                               GLshort x, GLshort y,
                               GLshort w, GLshort h, GLuint color);
 
+void
+intelEmitImmediateColorExpandBlit(struct intel_context *intel,
+				  GLuint cpp,
+				  GLubyte *src_bits, GLuint src_size,
+				  GLuint fg_color,
+				  GLshort dst_pitch,
+				  dri_bo *dst_buffer,
+				  GLuint dst_offset,
+				  GLboolean dst_tiled,
+				  GLshort x, GLshort y,
+				  GLshort w, GLshort h,
+				  GLenum logic_op);
 
 #endif
diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index d93677bde2..89b489e28a 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -135,15 +135,23 @@ intel_bufferobj_data(GLcontext * ctx,
    if (intel_obj->region)
       intel_bufferobj_release_region(intel, intel_obj);
 
+   /* While it would seem to make sense to always reallocate the buffer here,
+    * since it should allow us better concurrency between rendering and
+    * map-cpu write-unmap, doing so was a minor (~10%) performance loss
+    * for both classic and TTM mode with openarena.  That may change with
+    * improved buffer manager algorithms.
+    */
    if (intel_obj->buffer != NULL && intel_obj->buffer->size != size) {
       dri_bo_unreference(intel_obj->buffer);
       intel_obj->buffer = NULL;
    }
+   if (size != 0) {
+      if (intel_obj->buffer == NULL)
+	 intel_bufferobj_alloc_buffer(intel, intel_obj);
 
-   intel_bufferobj_alloc_buffer(intel, intel_obj);
-
-   if (data != NULL)
-      dri_bo_subdata(intel_obj->buffer, 0, size, data);
+      if (data != NULL)
+	 dri_bo_subdata(intel_obj->buffer, 0, size, data);
+   }
 }
 
 
diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.h b/src/mesa/drivers/dri/intel/intel_buffer_objects.h
index db579a8ae4..7cecc3232d 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.h
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.h
@@ -1,4 +1,4 @@
- /**************************************************************************
+/**************************************************************************
  * 
  * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas.
  * All Rights Reserved.
diff --git a/src/mesa/drivers/dri/intel/intel_buffers.c b/src/mesa/drivers/dri/intel/intel_buffers.c
index 44a55bbed9..78ffa3c1f8 100644
--- a/src/mesa/drivers/dri/intel/intel_buffers.c
+++ b/src/mesa/drivers/dri/intel/intel_buffers.c
@@ -29,9 +29,9 @@
 #include "intel_context.h"
 #include "intel_blit.h"
 #include "intel_buffers.h"
+#include "intel_chipset.h"
 #include "intel_depthstencil.h"
 #include "intel_fbo.h"
-#include "intel_tris.h"
 #include "intel_regions.h"
 #include "intel_batchbuffer.h"
 #include "intel_reg.h"
@@ -41,7 +41,7 @@
 #include "framebuffer.h"
 #include "swrast/swrast.h"
 #include "vblank.h"
-
+#include "i915_drm.h"
 
 /* This block can be removed when libdrm >= 2.3.1 is required */
 
@@ -59,6 +59,7 @@ typedef struct drm_i915_flip {
 
 #endif
 
+#define FILE_DEBUG_FLAG DEBUG_BLIT
 
 /**
  * XXX move this into a new dri/common/cliprects.c file.
@@ -196,6 +197,77 @@ intelSetBackClipRects(struct intel_context *intel)
    }
 }
 
+#ifdef I915
+static void
+intelUpdatePageFlipping(struct intel_context *intel,
+			GLint areaA, GLint areaB)
+{
+   __DRIdrawablePrivate *dPriv = intel->driDrawable;
+   struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
+   GLboolean pf_active;
+   GLint pf_planes;
+
+   /* Update page flipping info */
+   pf_planes = 0;
+
+   if (areaA > 0)
+      pf_planes |= 1;
+
+   if (areaB > 0)
+      pf_planes |= 2;
+
+   intel_fb->pf_current_page = (intel->sarea->pf_current_page >>
+				(intel_fb->pf_planes & 0x2)) & 0x3;
+
+   intel_fb->pf_num_pages = intel->intelScreen->third.handle ? 3 : 2;
+
+   pf_active = pf_planes && (pf_planes & intel->sarea->pf_active) == pf_planes;
+
+   if (INTEL_DEBUG & DEBUG_LOCK)
+      if (pf_active != intel_fb->pf_active)
+	 _mesa_printf("%s - Page flipping %sactive\n", __progname,
+		      pf_active ? "" : "in");
+
+   if (pf_active) {
+      /* Sync pages between planes if flipping on both at the same time */
+      if (pf_planes == 0x3 && pf_planes != intel_fb->pf_planes &&
+	  (intel->sarea->pf_current_page & 0x3) !=
+	  (((intel->sarea->pf_current_page) >> 2) & 0x3)) {
+	 drm_i915_flip_t flip;
+
+	 if (intel_fb->pf_current_page ==
+	     (intel->sarea->pf_current_page & 0x3)) {
+	    /* XXX: This is ugly, but emitting two flips 'in a row' can cause
+	     * lockups for unknown reasons.
+	     */
+	    intel->sarea->pf_current_page =
+	       intel->sarea->pf_current_page & 0x3;
+	    intel->sarea->pf_current_page |=
+	       ((intel_fb->pf_current_page + intel_fb->pf_num_pages - 1) %
+		intel_fb->pf_num_pages) << 2;
+
+	    flip.pipes = 0x2;
+	 } else {
+	    intel->sarea->pf_current_page =
+	       intel->sarea->pf_current_page & (0x3 << 2);
+	    intel->sarea->pf_current_page |=
+	       (intel_fb->pf_current_page + intel_fb->pf_num_pages - 1) %
+	       intel_fb->pf_num_pages;
+
+	    flip.pipes = 0x1;
+	 }
+
+	 drmCommandWrite(intel->driFd, DRM_I915_FLIP, &flip, sizeof(flip));
+      }
+
+      intel_fb->pf_planes = pf_planes;
+   }
+
+   intel_fb->pf_active = pf_active;
+   intel_flip_renderbuffers(intel_fb);
+   intel_draw_buffer(&intel->ctx, intel->ctx.DrawBuffer);
+}
+#endif /* I915 */
 
 /**
  * This will be called whenever the currently bound window is moved/resized.
@@ -232,7 +304,7 @@ intelWindowMoved(struct intel_context *intel)
    }
 
    if (intel->intelScreen->driScrnPriv->ddx_version.minor >= 7) {
-      drmI830Sarea *sarea = intel->sarea;
+      volatile drmI830Sarea *sarea = intel->sarea;
       drm_clip_rect_t drw_rect = { .x1 = dPriv->x, .x2 = dPriv->x + dPriv->w,
 				   .y1 = dPriv->y, .y2 = dPriv->y + dPriv->h };
       drm_clip_rect_t planeA_rect = { .x1 = sarea->planeA_x, .y1 = sarea->planeA_y,
@@ -244,69 +316,10 @@ intelWindowMoved(struct intel_context *intel)
       GLint areaA = driIntersectArea( drw_rect, planeA_rect );
       GLint areaB = driIntersectArea( drw_rect, planeB_rect );
       GLuint flags = dPriv->vblFlags;
-      GLboolean pf_active;
-      GLint pf_planes;
-
-      /* Update page flipping info
-       */
-      pf_planes = 0;
-
-      if (areaA > 0)
-	 pf_planes |= 1;
-
-      if (areaB > 0)
-	 pf_planes |= 2;
-
-      intel_fb->pf_current_page = (intel->sarea->pf_current_page >>
-				   (intel_fb->pf_planes & 0x2)) & 0x3;
-
-      intel_fb->pf_num_pages = intel->intelScreen->third.handle ? 3 : 2;
-
-      pf_active = pf_planes && (pf_planes & intel->sarea->pf_active) == pf_planes;
-
-      if (INTEL_DEBUG & DEBUG_LOCK)
-	 if (pf_active != intel_fb->pf_active)
-	    _mesa_printf("%s - Page flipping %sactive\n", __progname,
-			 pf_active ? "" : "in");
-
-      if (pf_active) {
-	 /* Sync pages between planes if flipping on both at the same time */
-	 if (pf_planes == 0x3 && pf_planes != intel_fb->pf_planes &&
-	     (intel->sarea->pf_current_page & 0x3) !=
-	     (((intel->sarea->pf_current_page) >> 2) & 0x3)) {
-	    drm_i915_flip_t flip;
-
-	    if (intel_fb->pf_current_page ==
-		(intel->sarea->pf_current_page & 0x3)) {
-	       /* XXX: This is ugly, but emitting two flips 'in a row' can cause
-		* lockups for unknown reasons.
-		*/
-               intel->sarea->pf_current_page =
-		  intel->sarea->pf_current_page & 0x3;
-	       intel->sarea->pf_current_page |=
-		  ((intel_fb->pf_current_page + intel_fb->pf_num_pages - 1) %
-		   intel_fb->pf_num_pages) << 2;
-
-	       flip.pipes = 0x2;
-	    } else {
-               intel->sarea->pf_current_page =
-		  intel->sarea->pf_current_page & (0x3 << 2);
-	       intel->sarea->pf_current_page |=
-		  (intel_fb->pf_current_page + intel_fb->pf_num_pages - 1) %
-		  intel_fb->pf_num_pages;
-
-	       flip.pipes = 0x1;
-	    }
-
-	    drmCommandWrite(intel->driFd, DRM_I915_FLIP, &flip, sizeof(flip));
-	 }
-
-	 intel_fb->pf_planes = pf_planes;
-      }
 
-      intel_fb->pf_active = pf_active;
-      intel_flip_renderbuffers(intel_fb);
-      intel_draw_buffer(&intel->ctx, intel->ctx.DrawBuffer);
+#ifdef I915
+      intelUpdatePageFlipping(intel, areaA, areaB);
+#endif
 
       /* Update vblank info
        */
@@ -370,11 +383,14 @@ intelWindowMoved(struct intel_context *intel)
    intel_fb->Base.Initialized = GL_TRUE; /* XXX remove someday */
 
    /* Update hardware scissor */
-   ctx->Driver.Scissor(ctx, ctx->Scissor.X, ctx->Scissor.Y,
-                       ctx->Scissor.Width, ctx->Scissor.Height);
+   if (ctx->Driver.Scissor != NULL) {
+      ctx->Driver.Scissor(ctx, ctx->Scissor.X, ctx->Scissor.Y,
+			  ctx->Scissor.Width, ctx->Scissor.Height);
+   }
 
    /* Re-calculate viewport related state */
-   ctx->Driver.DepthRange( ctx, ctx->Viewport.Near, ctx->Viewport.Far );
+   if (ctx->Driver.DepthRange != NULL)
+      ctx->Driver.DepthRange( ctx, ctx->Viewport.Near, ctx->Viewport.Far );
 }
 
 
@@ -389,9 +405,6 @@ intelClearWithTris(struct intel_context *intel, GLbitfield mask)
    struct gl_framebuffer *fb = ctx->DrawBuffer;
    GLuint buf;
 
-   if (INTEL_DEBUG & DEBUG_BLIT)
-      _mesa_printf("%s 0x%x\n", __FUNCTION__, mask);
-
    intel->vtbl.install_meta_state(intel);
 
    /* Back and stencil cliprects are the same.  Try and do both
@@ -402,8 +415,6 @@ intelClearWithTris(struct intel_context *intel, GLbitfield mask)
 	 intel_get_rb_region(fb, BUFFER_BACK_LEFT);
       struct intel_region *depthRegion =
 	 intel_get_rb_region(fb, BUFFER_DEPTH);
-      const GLuint clearColor = (backRegion && backRegion->cpp == 4)
-	 ? intel->ClearColor8888 : intel->ClearColor565;
 
       intel->vtbl.meta_draw_region(intel, backRegion, depthRegion);
 
@@ -424,13 +435,14 @@ intelClearWithTris(struct intel_context *intel, GLbitfield mask)
       else
 	 intel->vtbl.meta_no_depth_write(intel);
 
-      intel_meta_draw_quad(intel,
-			   fb->_Xmin,
-			   fb->_Xmax,
-			   fb->_Ymin,
-			   fb->_Ymax,
-			   intel->ctx.Depth.Clear, clearColor,
-			   0, 0, 0, 0);   /* texcoords */
+      intel->vtbl.meta_draw_quad(intel,
+				 fb->_Xmin,
+				 fb->_Xmax,
+				 fb->_Ymin,
+				 fb->_Ymax,
+				 intel->ctx.Depth.Clear,
+				 intel->ClearColor8888,
+				 0, 0, 0, 0);   /* texcoords */
 
       mask &= ~(BUFFER_BIT_BACK_LEFT | BUFFER_BIT_STENCIL | BUFFER_BIT_DEPTH);
    }
@@ -441,8 +453,6 @@ intelClearWithTris(struct intel_context *intel, GLbitfield mask)
       if (mask & bufBit) {
 	 struct intel_renderbuffer *irbColor =
 	    intel_renderbuffer(fb->Attachment[buf].Renderbuffer);
-	 GLuint color = (irbColor->region->cpp == 4)
-	    ? intel->ClearColor8888 : intel->ClearColor565;
 
 	 ASSERT(irbColor);
 
@@ -454,13 +464,13 @@ intelClearWithTris(struct intel_context *intel, GLbitfield mask)
 	 /* XXX: Using INTEL_BATCH_NO_CLIPRECTS here is dangerous as the
 	  * drawing origin may not be correctly emitted.
 	  */
-	 intel_meta_draw_quad(intel,
-			      fb->_Xmin,
-			      fb->_Xmax,
-			      fb->_Ymin,
-			      fb->_Ymax,
-			      0, color,
-			      0, 0, 0, 0);   /* texcoords */
+	 intel->vtbl.meta_draw_quad(intel,
+				    fb->_Xmin,
+				    fb->_Xmax,
+				    fb->_Ymin,
+				    fb->_Ymax,
+				    0, intel->ClearColor8888,
+				    0, 0, 0, 0);   /* texcoords */
 
 	 mask &= ~bufBit;
       }
@@ -470,6 +480,28 @@ intelClearWithTris(struct intel_context *intel, GLbitfield mask)
    intel_batchbuffer_flush(intel->batch);
 }
 
+static const char *buffer_names[] = {
+   [BUFFER_FRONT_LEFT] = "front",
+   [BUFFER_BACK_LEFT] = "back",
+   [BUFFER_FRONT_RIGHT] = "front right",
+   [BUFFER_BACK_RIGHT] = "back right",
+   [BUFFER_AUX0] = "aux0",
+   [BUFFER_AUX1] = "aux1",
+   [BUFFER_AUX2] = "aux2",
+   [BUFFER_AUX3] = "aux3",
+   [BUFFER_DEPTH] = "depth",
+   [BUFFER_STENCIL] = "stencil",
+   [BUFFER_ACCUM] = "accum",
+   [BUFFER_COLOR0] = "color0",
+   [BUFFER_COLOR1] = "color1",
+   [BUFFER_COLOR2] = "color2",
+   [BUFFER_COLOR3] = "color3",
+   [BUFFER_COLOR4] = "color4",
+   [BUFFER_COLOR5] = "color5",
+   [BUFFER_COLOR6] = "color6",
+   [BUFFER_COLOR7] = "color7",
+};
+
 /**
  * Called by ctx->Driver.Clear.
  */
@@ -504,8 +536,12 @@ intelClear(GLcontext *ctx, GLbitfield mask)
          = intel_get_rb_region(fb, BUFFER_STENCIL);
       if (stencilRegion) {
          /* have hw stencil */
-         if ((ctx->Stencil.WriteMask[0] & 0xff) != 0xff) {
-            /* not clearing all stencil bits, so use triangle clearing */
+         if (IS_965(intel->intelScreen->deviceID) ||
+	     (ctx->Stencil.WriteMask[0] & 0xff) != 0xff) {
+	    /* We have to use the 3D engine if we're clearing a partial mask
+	     * of the stencil buffer, or if we're on a 965 which has a tiled
+	     * depth/stencil buffer in a layout we can't blit to.
+	     */
             tri_mask |= BUFFER_BIT_STENCIL;
          }
          else {
@@ -518,7 +554,8 @@ intelClear(GLcontext *ctx, GLbitfield mask)
    /* HW depth */
    if (mask & BUFFER_BIT_DEPTH) {
       /* clear depth with whatever method is used for stencil (see above) */
-      if (tri_mask & BUFFER_BIT_STENCIL)
+      if (IS_965(intel->intelScreen->deviceID) ||
+	  tri_mask & BUFFER_BIT_STENCIL)
          tri_mask |= BUFFER_BIT_DEPTH;
       else
          blit_mask |= BUFFER_BIT_DEPTH;
@@ -538,17 +575,43 @@ intelClear(GLcontext *ctx, GLbitfield mask)
       }
    }
 
-
    intelFlush(ctx);             /* XXX intelClearWithBlit also does this */
 
-   if (blit_mask)
+   if (blit_mask) {
+      if (INTEL_DEBUG & DEBUG_BLIT) {
+	 DBG("blit clear:");
+	 for (i = 0; i < BUFFER_COUNT; i++) {
+	    if (blit_mask & (1 << i))
+	       DBG(" %s", buffer_names[i]);
+	 }
+	 DBG("\n");
+      }
       intelClearWithBlit(ctx, blit_mask);
+   }
 
-   if (tri_mask)
+   if (tri_mask) {
+      if (INTEL_DEBUG & DEBUG_BLIT) {
+	 DBG("tri clear:");
+	 for (i = 0; i < BUFFER_COUNT; i++) {
+	    if (tri_mask & (1 << i))
+	       DBG(" %s", buffer_names[i]);
+	 }
+	 DBG("\n");
+      }
       intelClearWithTris(intel, tri_mask);
+   }
 
-   if (swrast_mask)
+   if (swrast_mask) {
+      if (INTEL_DEBUG & DEBUG_BLIT) {
+	 DBG("swrast clear:");
+	 for (i = 0; i < BUFFER_COUNT; i++) {
+	    if (swrast_mask & (1 << i))
+	       DBG(" %s", buffer_names[i]);
+	 }
+	 DBG("\n");
+      }
       _swrast_Clear(ctx, swrast_mask);
+   }
 }
 
 
@@ -564,7 +627,8 @@ intel_wait_flips(struct intel_context *intel, GLuint batch_flags)
 			     BUFFER_BIT_FRONT_LEFT ? BUFFER_FRONT_LEFT :
 			     BUFFER_BACK_LEFT);
 
-   if (intel_fb->Base.Name == 0 && intel_rb->pf_pending == intel_fb->pf_seq) {
+   if (intel_fb->Base.Name == 0 && intel_rb &&
+       intel_rb->pf_pending == intel_fb->pf_seq) {
       GLint pf_planes = intel_fb->pf_planes;
       BATCH_LOCALS;
 
@@ -586,6 +650,7 @@ intel_wait_flips(struct intel_context *intel, GLuint batch_flags)
 static GLboolean
 intelPageFlip(const __DRIdrawablePrivate * dPriv)
 {
+#ifdef I915
    struct intel_context *intel;
    int ret;
    struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
@@ -638,6 +703,9 @@ intelPageFlip(const __DRIdrawablePrivate * dPriv)
    intel_draw_buffer(&intel->ctx, &intel_fb->Base);
 
    return GL_TRUE;
+#else
+   return GL_FALSE;
+#endif
 }
 
 #if 0
@@ -863,16 +931,14 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
    /*
     * How many color buffers are we drawing into?
     */
-   if (fb->_NumColorDrawBuffers[0] != 1
-#if 0
-       /* XXX FBO temporary - always use software rendering */
-       || 1
-#endif
-      ) {
+   if (fb->_NumColorDrawBuffers[0] != 1) {
       /* writing to 0 or 2 or 4 color buffers */
       /*_mesa_debug(ctx, "Software rendering\n");*/
       FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, GL_TRUE);
-      front = 1;                /* might not have back color buffer */
+      colorRegion = NULL;
+
+      if (fb->Name != 0)
+	 intelSetRenderbufferClipRects(intel);
    }
    else {
       /* draw to exactly one color buffer */
@@ -881,30 +947,30 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
       if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_FRONT_LEFT) {
          front = 1;
       }
-   }
 
-   /*
-    * Get the intel_renderbuffer for the colorbuffer we're drawing into.
-    * And set up cliprects.
-    */
-   if (fb->Name == 0) {
-      /* drawing to window system buffer */
-      if (front) {
-         intelSetFrontClipRects(intel);
-         colorRegion = intel_get_rb_region(fb, BUFFER_FRONT_LEFT);
+      /*
+       * Get the intel_renderbuffer for the colorbuffer we're drawing into.
+       * And set up cliprects.
+       */
+      if (fb->Name == 0) {
+	 /* drawing to window system buffer */
+	 if (front) {
+	    intelSetFrontClipRects(intel);
+	    colorRegion = intel_get_rb_region(fb, BUFFER_FRONT_LEFT);
+	 }
+	 else {
+	    intelSetBackClipRects(intel);
+	    colorRegion = intel_get_rb_region(fb, BUFFER_BACK_LEFT);
+	 }
       }
       else {
-         intelSetBackClipRects(intel);
-         colorRegion = intel_get_rb_region(fb, BUFFER_BACK_LEFT);
+	 /* drawing to user-created FBO */
+	 struct intel_renderbuffer *irb;
+	 intelSetRenderbufferClipRects(intel);
+	 irb = intel_renderbuffer(fb->_ColorDrawBuffers[0][0]);
+	 colorRegion = (irb && irb->region) ? irb->region : NULL;
       }
    }
-   else {
-      /* drawing to user-created FBO */
-      struct intel_renderbuffer *irb;
-      intelSetRenderbufferClipRects(intel);
-      irb = intel_renderbuffer(fb->_ColorDrawBuffers[0][0]);
-      colorRegion = (irb && irb->region) ? irb->region : NULL;
-   }
 
    /* Update culling direction which changes depending on the
     * orientation of the buffer:
@@ -953,7 +1019,10 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
          ASSERT(irbStencil->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT);
          FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_FALSE);
          /* need to re-compute stencil hw state */
-         ctx->Driver.Enable(ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled);
+	 if (ctx->Driver.Enable != NULL)
+	    ctx->Driver.Enable(ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled);
+	 else
+	    ctx->NewState |= _NEW_STENCIL;
          if (!depthRegion)
             depthRegion = irbStencil->region;
       }
@@ -965,42 +1034,46 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
       /* XXX FBO: instead of FALSE, pass ctx->Stencil.Enabled ??? */
       FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_FALSE);
       /* need to re-compute stencil hw state */
-      ctx->Driver.Enable(ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled);
+      if (ctx->Driver.Enable != NULL)
+	 ctx->Driver.Enable(ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled);
+      else
+	 ctx->NewState |= _NEW_STENCIL;
    }
 
    /*
     * Update depth test state
     */
-   if (ctx->Depth.Test && fb->Visual.depthBits > 0) {
-      ctx->Driver.Enable(ctx, GL_DEPTH_TEST, GL_TRUE);
-   }
-   else {
-      ctx->Driver.Enable(ctx, GL_DEPTH_TEST, GL_FALSE);
-   }
-
-   /**
-    ** Release old regions, reference new regions
-    **/
-#if 0                           /* XXX FBO: this seems to be redundant with i915_state_draw_region() */
-   if (intel->draw_region != colorRegion) {
-      intel_region_release(&intel->draw_region);
-      intel_region_reference(&intel->draw_region, colorRegion);
-   }
-   if (intel->intelScreen->depth_region != depthRegion) {
-      intel_region_release(&intel->intelScreen->depth_region);
-      intel_region_reference(&intel->intelScreen->depth_region, depthRegion);
+   if (ctx->Driver.Enable) {
+      if (ctx->Depth.Test && fb->Visual.depthBits > 0) {
+	 ctx->Driver.Enable(ctx, GL_DEPTH_TEST, GL_TRUE);
+      } else {
+	 ctx->Driver.Enable(ctx, GL_DEPTH_TEST, GL_FALSE);
+      }
+   } else {
+      ctx->NewState |= _NEW_DEPTH;
    }
-#endif
 
    intel->vtbl.set_draw_region(intel, colorRegion, depthRegion);
 
    /* update viewport since it depends on window size */
-   ctx->Driver.Viewport(ctx, ctx->Viewport.X, ctx->Viewport.Y,
-                        ctx->Viewport.Width, ctx->Viewport.Height);
+   if (ctx->Driver.Viewport) {
+      ctx->Driver.Viewport(ctx, ctx->Viewport.X, ctx->Viewport.Y,
+			   ctx->Viewport.Width, ctx->Viewport.Height);
+   } else {
+      ctx->NewState |= _NEW_VIEWPORT;
+   }
 
-   /* Update hardware scissor */
-   ctx->Driver.Scissor(ctx, ctx->Scissor.X, ctx->Scissor.Y,
-                       ctx->Scissor.Width, ctx->Scissor.Height);
+   /* Set state we know depends on drawable parameters:
+    */
+   if (ctx->Driver.Scissor)
+      ctx->Driver.Scissor(ctx, ctx->Scissor.X, ctx->Scissor.Y,
+			  ctx->Scissor.Width, ctx->Scissor.Height);
+   intel->NewGLState |= _NEW_SCISSOR;
+
+   if (ctx->Driver.DepthRange)
+      ctx->Driver.DepthRange(ctx,
+			     ctx->Viewport.Near,
+			     ctx->Viewport.Far);
 }
 
 
diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c
index 2ec0241af4..b2aa056b90 100644
--- a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c
+++ b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c
@@ -48,138 +48,138 @@
 
 #include "intel_bufmgr_ttm.h"
 
-#define BUFMGR_DEBUG 0
-
-struct intel_reloc_info
-{
-    GLuint type;
-    GLuint reloc;
-    GLuint delta;
-    GLuint index;
-    drm_handle_t handle;
-};
-
-struct intel_bo_node
-{
-    drmMMListHead head;
-    drmBO *buf;
-    struct drm_i915_op_arg bo_arg;
-    uint64_t flags;
-    uint64_t mask;
-    void (*destroy)(void *);
-    void *priv;
-};
-
-struct intel_bo_reloc_list
-{
-    drmMMListHead head;
-    drmBO buf;
-    uint32_t *relocs;
-};
-
-struct intel_bo_reloc_node
-{
-    drmMMListHead head;
-    drm_handle_t handle;
-    uint32_t nr_reloc_types;
-    struct intel_bo_reloc_list type_list;
-};
+#define DBG(...) do {					\
+   if (bufmgr_ttm->bufmgr.debug)			\
+      _mesa_printf(__VA_ARGS__);			\
+} while (0)
 
+/* Buffer validation list */
 struct intel_bo_list {
     unsigned numCurrent;
     drmMMListHead list;
-    void (*destroy)(void *node);
 };
 
 typedef struct _dri_bufmgr_ttm {
     dri_bufmgr bufmgr;
 
     int fd;
-    _glthread_Mutex mutex;
     unsigned int fence_type;
     unsigned int fence_type_flush;
 
     uint32_t max_relocs;
-    /** ttm relocation list */
-    struct intel_bo_list list;
-    struct intel_bo_list reloc_list;
-
+    struct intel_bo_list list; /* list of buffers to be validated */
 } dri_bufmgr_ttm;
 
 typedef struct _dri_bo_ttm {
     dri_bo bo;
 
-    int refcount;		/* Protected by bufmgr->mutex */
+    int refcount;
     drmBO drm_bo;
     const char *name;
+
+    /** DRM buffer object containing relocation list */
+    drmBO *reloc_buf;
+    uint32_t *relocs;
 } dri_bo_ttm;
 
 typedef struct _dri_fence_ttm
 {
     dri_fence fence;
 
-    int refcount;		/* Protected by bufmgr->mutex */
+    int refcount;
     const char *name;
     drmFence drm_fence;
 } dri_fence_ttm;
 
+/* Validation list node */
+struct intel_bo_node
+{
+    drmMMListHead head;
+    dri_bo *bo;
+    struct drm_i915_op_arg bo_arg;
+    uint64_t flags;
+    uint64_t mask;
+};
 
 static void
-intel_bo_free_list(struct intel_bo_list *list)
+intel_init_validate_list(struct intel_bo_list *list)
 {
-    struct intel_bo_node *node;
+    DRMINITLISTHEAD(&list->list);
+    list->numCurrent = 0;
+}
+
+/**
+ * Empties the validation list and clears the relocations 
+ */
+static void
+intel_free_validate_list(dri_bufmgr_ttm *bufmgr_ttm)
+{
+    struct intel_bo_list *list = &bufmgr_ttm->list;
     drmMMListHead *l;
 
-    l = list->list.next;
-    while(l != &list->list) {
+    for (l = list->list.next; l != &list->list; l = list->list.next) {
+        struct intel_bo_node *node =
+	   DRMLISTENTRY(struct intel_bo_node, l, head);
+	dri_bo_ttm *bo_ttm = (dri_bo_ttm *)node->bo;
+
 	DRMLISTDEL(l);
-	node = DRMLISTENTRY(struct intel_bo_node, l, head);
-	list->destroy(node);
-	l = list->list.next;
+
+	/* Clear relocation list */
+	if (bo_ttm->relocs != NULL)
+	   bo_ttm->relocs[0] = bo_ttm->relocs[0] & ~0xffff;
+
+	dri_bo_unreference(node->bo);
+
+	drmFree(node);
 	list->numCurrent--;
     }
 }
 
-static void
-generic_destroy(void *nodep)
+static void dri_ttm_dump_validation_list(dri_bufmgr_ttm *bufmgr_ttm)
 {
-    free(nodep);
-}
+    struct intel_bo_list *list = &bufmgr_ttm->list;
+    drmMMListHead *l;
+    int i = 0;
 
-static int
-intel_create_bo_list(int numTarget, struct intel_bo_list *list,
-		     void (*destroy)(void *))
-{
-    DRMINITLISTHEAD(&list->list);
-    list->numCurrent = 0;
-    if (destroy)
-        list->destroy = destroy;
-    else
-        list->destroy = generic_destroy;
-    return 0;
+    for (l = list->list.next; l != &list->list; l = l->next) {
+	int j;
+        struct intel_bo_node *node =
+	    DRMLISTENTRY(struct intel_bo_node, l, head);
+	dri_bo_ttm *bo_ttm = (dri_bo_ttm *)node->bo;
+
+	if (bo_ttm->relocs != NULL) {
+	    for (j = 0; j < (bo_ttm->relocs[0] & 0xffff); j++) {
+		uint32_t *reloc_entry = bo_ttm->relocs + I915_RELOC_HEADER +
+		    j * I915_RELOC0_STRIDE;
+
+		DBG("%2d: %s@0x%08x -> %d + 0x%08x\n",
+		    i, bo_ttm->name,
+		    reloc_entry[0], reloc_entry[2], reloc_entry[1]);
+	    }
+	} else {
+	    DBG("%2d: %s\n", i, bo_ttm->name);
+	}
+	i++;
+    }
 }
 
-
 static struct drm_i915_op_arg *
 intel_setup_validate_list(dri_bufmgr_ttm *bufmgr_ttm, GLuint *count_p)
 {
     struct intel_bo_list *list = &bufmgr_ttm->list;
-    struct intel_bo_list *reloc_list = &bufmgr_ttm->reloc_list;
-    struct intel_bo_node *node;
-    struct intel_bo_reloc_node *rl_node;
-    drmMMListHead *l, *rl;
-    struct drm_i915_op_arg *arg, *first;
-    struct drm_bo_op_req *req;
+    drmMMListHead *l;
+    struct drm_i915_op_arg *first;
     uint64_t *prevNext = NULL;
     GLuint count = 0;
 
     first = NULL;
 
     for (l = list->list.next; l != &list->list; l = l->next) {
-        node = DRMLISTENTRY(struct intel_bo_node, l, head);
-
-        arg = &node->bo_arg;
-        req = &arg->d.req;
+        struct intel_bo_node *node =
+	    DRMLISTENTRY(struct intel_bo_node, l, head);
+	dri_bo_ttm *ttm_buf = (dri_bo_ttm *)node->bo;
+	struct drm_i915_op_arg *arg = &node->bo_arg;
+	struct drm_bo_op_req *req = &arg->d.req;
 
         if (!first)
             first = arg;
@@ -189,27 +189,22 @@ intel_setup_validate_list(dri_bufmgr_ttm *bufmgr_ttm, GLuint *count_p)
 
 	memset(arg, 0, sizeof(*arg));
 	prevNext = &arg->next;
-	req->bo_req.handle = node->buf->handle;
+	req->bo_req.handle = ttm_buf->drm_bo.handle;
 	req->op = drm_bo_validate;
 	req->bo_req.flags = node->flags;
 	req->bo_req.hint = 0;
 #ifdef DRM_BO_HINT_PRESUMED_OFFSET
 	req->bo_req.hint |= DRM_BO_HINT_PRESUMED_OFFSET;
-	req->bo_req.presumed_offset = ((dri_bo *) node->priv)->offset;
+	req->bo_req.presumed_offset = node->bo->offset;
 #endif
 	req->bo_req.mask = node->mask;
 	req->bo_req.fence_class = 0; /* Backwards compat. */
-	arg->reloc_handle = 0;
 
-	for (rl = reloc_list->list.next; rl != &reloc_list->list;
-	     rl = rl->next)
-	{
-	    rl_node = DRMLISTENTRY(struct intel_bo_reloc_node, rl, head);
+	if (ttm_buf->reloc_buf != NULL)
+	    arg->reloc_handle = ttm_buf->reloc_buf->handle;
+	else
+	    arg->reloc_handle = 0;
 
-	    if (rl_node->handle == node->buf->handle) {
-		arg->reloc_handle = rl_node->type_list.buf.handle;
-	    }
-	}
 	count++;
     }
 
@@ -220,46 +215,6 @@ intel_setup_validate_list(dri_bufmgr_ttm *bufmgr_ttm, GLuint *count_p)
     return first;
 }
 
-static void
-intel_free_validate_list(dri_bufmgr_ttm *bufmgr_ttm)
-{
-    struct intel_bo_list *list = &bufmgr_ttm->list;
-    struct intel_bo_node *node;
-    drmMMListHead *l;
-
-    for (l = list->list.next; l != &list->list; l = l->next) {
-        node = DRMLISTENTRY(struct intel_bo_node, l, head);
-
-	if (node->destroy)
-	    (*node->destroy)(node->priv);
-
-    }
-}
-
-static void
-intel_free_reloc_list(dri_bufmgr_ttm *bufmgr_ttm)
-{
-    struct intel_bo_list *reloc_list = &bufmgr_ttm->reloc_list;
-    struct intel_bo_reloc_node *reloc_node;
-    drmMMListHead *rl, *tmp;
-
-    for (rl = reloc_list->list.next, tmp = rl->next; rl != &reloc_list->list;
-	 rl = tmp, tmp = rl->next)
-    {
-	reloc_node = DRMLISTENTRY(struct intel_bo_reloc_node, rl, head);
-
-	DRMLISTDEL(rl);
-
-	if (reloc_node->nr_reloc_types > 1) {
-	    /* TODO */
-	}
-
-	drmBOUnmap(bufmgr_ttm->fd, &reloc_node->type_list.buf);
-	drmBOUnreference(bufmgr_ttm->fd, &reloc_node->type_list.buf);
-	free(reloc_node);
-    }
-}
-
 /**
  * Adds the given buffer to the list of buffers to be validated (moved into the
  * appropriate memory type) with the next batch submission.
@@ -268,24 +223,26 @@ intel_free_reloc_list(dri_bufmgr_ttm *bufmgr_ttm)
  * with the intersection of the memory type flags and the union of the
  * remaining flags.
  */
-static int
+static struct intel_bo_node *
 intel_add_validate_buffer(dri_bufmgr_ttm *bufmgr_ttm,
 			  dri_bo *buf,
 			  uint64_t flags, uint64_t mask,
-			  int *itemLoc, void (*destroy_cb)(void *))
+			  int *itemLoc)
 {
     struct intel_bo_list *list = &bufmgr_ttm->list;
-    struct intel_bo_node *node, *cur;
+    struct intel_bo_node *cur;
+    dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf;
     drmMMListHead *l;
     int count = 0;
     int ret = 0;
-    drmBO *buf_bo = &((dri_bo_ttm *)buf)->drm_bo;
     cur = NULL;
 
     /* Find the buffer in the validation list if it's already there. */
     for (l = list->list.next; l != &list->list; l = l->next) {
-	node = DRMLISTENTRY(struct intel_bo_node, l, head);
-	if (node->buf->handle == buf_bo->handle) {
+	struct intel_bo_node *node =
+	    DRMLISTENTRY(struct intel_bo_node, l, head);
+
+	if (((dri_bo_ttm *)node->bo)->drm_bo.handle == ttm_buf->drm_bo.handle) {
 	    cur = node;
 	    break;
 	}
@@ -295,13 +252,12 @@ intel_add_validate_buffer(dri_bufmgr_ttm *bufmgr_ttm,
     if (!cur) {
 	cur = drmMalloc(sizeof(*cur));
 	if (!cur) {
-	    return -ENOMEM;
+	    return NULL;
 	}
-	cur->buf = buf_bo;
-	cur->priv = buf;
+	cur->bo = buf;
+	dri_bo_reference(buf);
 	cur->flags = flags;
 	cur->mask = mask;
-	cur->destroy = destroy_cb;
 	ret = 1;
 
 	DRMLISTADDTAIL(&cur->head, &list->list);
@@ -314,21 +270,22 @@ intel_add_validate_buffer(dri_bufmgr_ttm *bufmgr_ttm,
 		    "%s: No shared memory types between "
 		    "0x%16llx and 0x%16llx\n",
 		    __FUNCTION__, cur->flags, flags);
-	    return -EINVAL;
+	    return NULL;
 	}
 	if (mask & cur->mask & ~DRM_BO_MASK_MEM  & (cur->flags ^ flags)) {
 	    fprintf(stderr,
 		    "%s: Incompatible flags between 0x%16llx and 0x%16llx "
 		    "(0x%16llx, 0x%16llx masks)\n",
 		    __FUNCTION__, cur->flags, flags, cur->mask, mask);
-	    return -EINVAL;
+	    return NULL;
 	}
 	cur->mask |= mask;
 	cur->flags = memFlags | ((cur->flags | flags) &
 				cur->mask & ~DRM_BO_MASK_MEM);
     }
     *itemLoc = count;
-    return ret;
+
+    return cur;
 }
 
 
@@ -336,132 +293,58 @@ intel_add_validate_buffer(dri_bufmgr_ttm *bufmgr_ttm,
 	sizeof(uint32_t))
 
 static int
-intel_create_new_reloc_type_list(dri_bufmgr_ttm *bufmgr_ttm,
-				 struct intel_bo_reloc_list *cur_type)
+intel_setup_reloc_list(dri_bo *bo)
 {
+    dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo;
+    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bo->bufmgr;
     int ret;
 
-    /* should allocate a drmBO here */
-    ret = drmBOCreate(bufmgr_ttm->fd, RELOC_BUF_SIZE(bufmgr_ttm->max_relocs), 0,
+    /* If the buffer exists, then it was just created, or it was reintialized
+     * at the last intel_free_validate_list().
+     */
+    if (bo_ttm->reloc_buf != NULL)
+       return 0;
+
+    bo_ttm->reloc_buf = malloc(sizeof(bo_ttm->drm_bo));
+
+    ret = drmBOCreate(bufmgr_ttm->fd,
+		      RELOC_BUF_SIZE(bufmgr_ttm->max_relocs), 0,
 		      NULL,
 		      DRM_BO_FLAG_MEM_LOCAL |
 		      DRM_BO_FLAG_READ |
 		      DRM_BO_FLAG_WRITE |
 		      DRM_BO_FLAG_MAPPABLE |
 		      DRM_BO_FLAG_CACHED,
-		      0, &cur_type->buf);
+		      0, bo_ttm->reloc_buf);
     if (ret) {
-	fprintf(stderr, "Failed to create relocation BO: %s\n",
-		strerror(-ret));
-	return ret;
+       fprintf(stderr, "Failed to create relocation BO: %s\n",
+	       strerror(-ret));
+       return ret;
     }
 
-    ret = drmBOMap(bufmgr_ttm->fd, &cur_type->buf,
+    ret = drmBOMap(bufmgr_ttm->fd, bo_ttm->reloc_buf,
 		   DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE,
-		   0, (void **)&cur_type->relocs);
+		   0, (void **)&bo_ttm->relocs);
     if (ret) {
-	fprintf(stderr, "Failed to map relocation BO: %s\n", strerror(-ret));
-	return ret;
-    }
-    return 0;
-}
-
-/**
- * Adds the relocation @reloc_info to the relocation list.
- */
-static int
-intel_add_validate_reloc(dri_bufmgr_ttm *bufmgr_ttm,
-			 struct intel_reloc_info *reloc_info)
-{
-    struct intel_bo_list *reloc_list = &bufmgr_ttm->reloc_list;
-    struct intel_bo_reloc_node *rl_node, *cur;
-    drmMMListHead *rl, *l;
-    int ret = 0;
-    uint32_t *reloc_start;
-    int num_relocs;
-    struct intel_bo_reloc_list *cur_type;
-
-    cur = NULL;
-
-    for (rl = reloc_list->list.next; rl != &reloc_list->list; rl = rl->next) {
-	rl_node = DRMLISTENTRY(struct intel_bo_reloc_node, rl, head);
-	if (rl_node->handle == reloc_info->handle) {
-	    cur = rl_node;
-	    break;
-	}
-    }
-
-    if (!cur) {
-
-	cur = malloc(sizeof(*cur));
-	if (!cur)
-	    return -ENOMEM;
-
-	cur->nr_reloc_types = 1;
-	cur->handle = reloc_info->handle;
-	cur_type = &cur->type_list;
-
-	DRMINITLISTHEAD(&cur->type_list.head);
-	ret = intel_create_new_reloc_type_list(bufmgr_ttm, cur_type);
-	if (ret) {
-	    return -1;
-	}
-	DRMLISTADDTAIL(&cur->head, &reloc_list->list);
-
-	cur_type->relocs[0] = 0 | (reloc_info->type << 16);
-	cur_type->relocs[1] = 0; // next reloc buffer handle is 0
-
-    } else {
-	int found = 0;
-	if ((cur->type_list.relocs[0] >> 16) == reloc_info->type) {
-		cur_type = &cur->type_list;
-		found = 1;
-	} else {
-	    for (l = cur->type_list.head.next; l != &cur->type_list.head;
-		 l = l->next)
-	    {
-	        cur_type = DRMLISTENTRY(struct intel_bo_reloc_list, l, head);
-	        if (((cur_type->relocs[0] >> 16) & 0xffff) == reloc_info->type)
-	    	    found = 1;
-		break;
-	    }
-        }
-
-	/* didn't find the relocation type */
-	if (!found) {
-	    cur_type = malloc(sizeof(*cur_type));
-	    if (!cur_type) {
-		return -ENOMEM;
-	    }
-
-	    ret = intel_create_new_reloc_type_list(bufmgr_ttm, cur_type);
-	    DRMLISTADDTAIL(&cur_type->head, &cur->type_list.head);
-
-	    cur_type->relocs[0] = (reloc_info->type << 16);
-	    cur_type->relocs[1] = 0;
-
-	    cur->nr_reloc_types++;
-	}
+       fprintf(stderr, "Failed to map relocation BO: %s\n",
+	       strerror(-ret));
+       return ret;
     }
 
-    reloc_start = cur_type->relocs;
-
-    num_relocs = (reloc_start[0] & 0xffff);
+    /* Initialize the relocation list with the header:
+     * DWORD 0: relocation type, relocation count
+     * DWORD 1: handle to next relocation list (currently none)
+     * DWORD 2: unused
+     * DWORD 3: unused
+     */
+    bo_ttm->relocs[0] = I915_RELOC_TYPE_0 << 16;
+    bo_ttm->relocs[1] = 0;
+    bo_ttm->relocs[2] = 0;
+    bo_ttm->relocs[3] = 0;
 
-    reloc_start[num_relocs * I915_RELOC0_STRIDE + I915_RELOC_HEADER] =
-       reloc_info->reloc;
-    reloc_start[num_relocs * I915_RELOC0_STRIDE + I915_RELOC_HEADER + 1] =
-       reloc_info->delta;
-    reloc_start[num_relocs * I915_RELOC0_STRIDE + I915_RELOC_HEADER + 2] =
-       reloc_info->index;
-    reloc_start[0]++;
-    if (((reloc_start[0] & 0xffff)) > (bufmgr_ttm->max_relocs)) {
-	return -ENOMEM;
-    }
     return 0;
 }
 
-
 #if 0
 int
 driFenceSignaled(DriFenceObject * fence, unsigned type)
@@ -472,9 +355,7 @@ driFenceSignaled(DriFenceObject * fence, unsigned type)
     if (fence == NULL)
 	return GL_TRUE;
 
-    _glthread_LOCK_MUTEX(fence->mutex);
     ret = drmFenceSignaled(bufmgr_ttm->fd, &fence->fence, type, &signaled);
-    _glthread_UNLOCK_MUTEX(fence->mutex);
     BM_CKFATAL(ret);
     return signaled;
 }
@@ -485,14 +366,12 @@ dri_ttm_alloc(dri_bufmgr *bufmgr, const char *name,
 	      unsigned long size, unsigned int alignment,
 	      uint64_t location_mask)
 {
-    dri_bufmgr_ttm *ttm_bufmgr;
+    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
     dri_bo_ttm *ttm_buf;
     unsigned int pageSize = getpagesize();
     int ret;
     unsigned int flags, hint;
 
-    ttm_bufmgr = (dri_bufmgr_ttm *)bufmgr;
-
     ttm_buf = malloc(sizeof(*ttm_buf));
     if (!ttm_buf)
 	return NULL;
@@ -506,7 +385,7 @@ dri_ttm_alloc(dri_bufmgr *bufmgr, const char *name,
     /* No hints we want to use. */
     hint = 0;
 
-    ret = drmBOCreate(ttm_bufmgr->fd, size, alignment / pageSize,
+    ret = drmBOCreate(bufmgr_ttm->fd, size, alignment / pageSize,
 		      NULL, flags, hint, &ttm_buf->drm_bo);
     if (ret != 0) {
 	free(ttm_buf);
@@ -518,10 +397,10 @@ dri_ttm_alloc(dri_bufmgr *bufmgr, const char *name,
     ttm_buf->bo.bufmgr = bufmgr;
     ttm_buf->name = name;
     ttm_buf->refcount = 1;
+    ttm_buf->reloc_buf = NULL;
+    ttm_buf->relocs = NULL;
 
-#if BUFMGR_DEBUG
-    fprintf(stderr, "bo_create: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
-#endif
+    DBG("bo_create: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
 
     return &ttm_buf->bo;
 }
@@ -548,17 +427,15 @@ dri_bo *
 intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name,
 			      unsigned int handle)
 {
-    dri_bufmgr_ttm *ttm_bufmgr;
+    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
     dri_bo_ttm *ttm_buf;
     int ret;
 
-    ttm_bufmgr = (dri_bufmgr_ttm *)bufmgr;
-
     ttm_buf = malloc(sizeof(*ttm_buf));
     if (!ttm_buf)
 	return NULL;
 
-    ret = drmBOReference(ttm_bufmgr->fd, handle, &ttm_buf->drm_bo);
+    ret = drmBOReference(bufmgr_ttm->fd, handle, &ttm_buf->drm_bo);
     if (ret != 0) {
        fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n",
 	       name, handle, strerror(-ret));
@@ -571,11 +448,11 @@ intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name,
     ttm_buf->bo.bufmgr = bufmgr;
     ttm_buf->name = name;
     ttm_buf->refcount = 1;
+    ttm_buf->reloc_buf = NULL;
+    ttm_buf->relocs = NULL;
 
-#if BUFMGR_DEBUG
-    fprintf(stderr, "bo_create_from_handle: %p %08x (%s)\n",
-	    &ttm_buf->bo, handle, ttm_buf->name);
-#endif
+    DBG("bo_create_from_handle: %p %08x (%s)\n",
+	&ttm_buf->bo, handle, ttm_buf->name);
 
     return &ttm_buf->bo;
 }
@@ -583,12 +460,9 @@ intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name,
 static void
 dri_ttm_bo_reference(dri_bo *buf)
 {
-    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr;
     dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf;
 
-    _glthread_LOCK_MUTEX(bufmgr_ttm->mutex);
     ttm_buf->refcount++;
-    _glthread_UNLOCK_MUTEX(bufmgr_ttm->mutex);
 }
 
 static void
@@ -600,24 +474,25 @@ dri_ttm_bo_unreference(dri_bo *buf)
     if (!buf)
 	return;
 
-    _glthread_LOCK_MUTEX(bufmgr_ttm->mutex);
     if (--ttm_buf->refcount == 0) {
 	int ret;
 
+	if (ttm_buf->reloc_buf) {
+	    drmBOUnmap(bufmgr_ttm->fd, ttm_buf->reloc_buf);
+	    drmBOUnreference(bufmgr_ttm->fd, ttm_buf->reloc_buf);
+	    free(ttm_buf->reloc_buf);
+	}
+
 	ret = drmBOUnreference(bufmgr_ttm->fd, &ttm_buf->drm_bo);
 	if (ret != 0) {
 	    fprintf(stderr, "drmBOUnreference failed (%s): %s\n",
 		    ttm_buf->name, strerror(-ret));
 	}
-#if BUFMGR_DEBUG
-	fprintf(stderr, "bo_unreference final: %p (%s)\n",
-		&ttm_buf->bo, ttm_buf->name);
-#endif
-	_glthread_UNLOCK_MUTEX(bufmgr_ttm->mutex);
+	DBG("bo_unreference final: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
+
 	free(buf);
 	return;
     }
-    _glthread_UNLOCK_MUTEX(bufmgr_ttm->mutex);
 }
 
 static int
@@ -635,9 +510,7 @@ dri_ttm_bo_map(dri_bo *buf, GLboolean write_enable)
 
     assert(buf->virtual == NULL);
 
-#if BUFMGR_DEBUG
-    fprintf(stderr, "bo_map: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
-#endif
+    DBG("bo_map: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
 
     return drmBOMap(bufmgr_ttm->fd, &ttm_buf->drm_bo, flags, 0, &buf->virtual);
 }
@@ -657,9 +530,7 @@ dri_ttm_bo_unmap(dri_bo *buf)
 
     buf->virtual = NULL;
 
-#if BUFMGR_DEBUG
-    fprintf(stderr, "bo_unmap: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
-#endif
+    DBG("bo_unmap: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
 
     return drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo);
 }
@@ -674,11 +545,9 @@ dri_fence *
 intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name,
 				drm_fence_arg_t *arg)
 {
-    dri_bufmgr_ttm *ttm_bufmgr;
+    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
     dri_fence_ttm *ttm_fence;
 
-    ttm_bufmgr = (dri_bufmgr_ttm *)bufmgr;
-
     ttm_fence = malloc(sizeof(*ttm_fence));
     if (!ttm_fence)
 	return NULL;
@@ -694,10 +563,8 @@ intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name,
     ttm_fence->name = name;
     ttm_fence->refcount = 1;
 
-#if BUFMGR_DEBUG
-    fprintf(stderr, "fence_create_from_handle: %p (%s)\n", &ttm_fence->fence,
-	    ttm_fence->name);
-#endif
+    DBG("fence_create_from_handle: %p (%s)\n",
+	&ttm_fence->fence, ttm_fence->name);
 
     return &ttm_fence->fence;
 }
@@ -709,13 +576,8 @@ dri_ttm_fence_reference(dri_fence *fence)
     dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence;
     dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr;
 
-    _glthread_LOCK_MUTEX(bufmgr_ttm->mutex);
     ++fence_ttm->refcount;
-    _glthread_UNLOCK_MUTEX(bufmgr_ttm->mutex);
-#if BUFMGR_DEBUG
-    fprintf(stderr, "fence_reference: %p (%s)\n", &fence_ttm->fence,
-	    fence_ttm->name);
-#endif
+    DBG("fence_reference: %p (%s)\n", &fence_ttm->fence, fence_ttm->name);
 }
 
 static void
@@ -727,11 +589,8 @@ dri_ttm_fence_unreference(dri_fence *fence)
     if (!fence)
 	return;
 
-#if BUFMGR_DEBUG
-    fprintf(stderr, "fence_unreference: %p (%s)\n", &fence_ttm->fence,
-	    fence_ttm->name);
-#endif
-    _glthread_LOCK_MUTEX(bufmgr_ttm->mutex);
+    DBG("fence_unreference: %p (%s)\n", &fence_ttm->fence, fence_ttm->name);
+
     if (--fence_ttm->refcount == 0) {
 	int ret;
 
@@ -741,11 +600,9 @@ dri_ttm_fence_unreference(dri_fence *fence)
 		    fence_ttm->name, strerror(-ret));
 	}
 
-	_glthread_UNLOCK_MUTEX(bufmgr_ttm->mutex);
 	free(fence);
 	return;
     }
-    _glthread_UNLOCK_MUTEX(bufmgr_ttm->mutex);
 }
 
 static void
@@ -755,19 +612,14 @@ dri_ttm_fence_wait(dri_fence *fence)
     dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr;
     int ret;
 
-    _glthread_LOCK_MUTEX(bufmgr_ttm->mutex);
     ret = drmFenceWait(bufmgr_ttm->fd, DRM_FENCE_FLAG_WAIT_LAZY, &fence_ttm->drm_fence, 0);
-    _glthread_UNLOCK_MUTEX(bufmgr_ttm->mutex);
     if (ret != 0) {
 	_mesa_printf("%s:%d: Error %d waiting for fence %s.\n",
 		     __FILE__, __LINE__, ret, fence_ttm->name);
 	abort();
     }
 
-#if BUFMGR_DEBUG
-    fprintf(stderr, "fence_wait: %p (%s)\n", &fence_ttm->fence,
-	    fence_ttm->name);
-#endif
+    DBG("fence_wait: %p (%s)\n", &fence_ttm->fence, fence_ttm->name);
 }
 
 static void
@@ -775,52 +627,56 @@ dri_bufmgr_ttm_destroy(dri_bufmgr *bufmgr)
 {
     dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
 
-    intel_bo_free_list(&bufmgr_ttm->list);
-    intel_bo_free_list(&bufmgr_ttm->reloc_list);
+    intel_free_validate_list(bufmgr_ttm);
 
-    _glthread_DESTROY_MUTEX(bufmgr_ttm->mutex);
     free(bufmgr);
 }
 
-
-static void
-intel_dribo_destroy_callback(void *priv)
-{
-    dri_bo *dribo = priv;
-
-    if (dribo)
-	dri_bo_unreference(dribo);
-}
-
+/**
+ * Adds the target buffer to the validation list and adds the relocation
+ * to the reloc_buffer's relocation list.
+ *
+ * The relocation entry at the given offset must already contain the
+ * precomputed relocation value, because the kernel will optimize out
+ * the relocation entry write when the buffer hasn't moved from the
+ * last known offset in target_buf.
+ */
 static void
 dri_ttm_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta,
 		   GLuint offset, dri_bo *target_buf)
 {
-    dri_bo_ttm *ttm_buf = (dri_bo_ttm *)reloc_buf;
     dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)reloc_buf->bufmgr;
-    int newItem;
-    struct intel_reloc_info reloc;
+    dri_bo_ttm *reloc_buf_ttm = (dri_bo_ttm *)reloc_buf;
+    struct intel_bo_node *node;
+    int index;
     int mask;
-    int ret;
+    int num_relocs;
+    uint32_t *this_reloc;
 
     mask = DRM_BO_MASK_MEM;
     mask |= flags & (DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE | DRM_BO_FLAG_EXE);
 
-    ret = intel_add_validate_buffer(bufmgr_ttm, target_buf, flags, mask,
-				    &newItem, intel_dribo_destroy_callback);
-    if (ret < 0)
-	return;
+    node = intel_add_validate_buffer(bufmgr_ttm, target_buf, flags, mask,
+				     &index);
+
+    intel_setup_reloc_list(reloc_buf);
 
-    if (ret == 1)
-	dri_bo_reference(target_buf);
+    num_relocs = (reloc_buf_ttm->relocs[0] & 0xffff);
 
-    reloc.type = I915_RELOC_TYPE_0;
-    reloc.reloc = offset;
-    reloc.delta = delta;
-    reloc.index = newItem;
-    reloc.handle = ttm_buf->drm_bo.handle;
+    /* Check overflow */
+    assert((reloc_buf_ttm->relocs[0] & 0xffff) < bufmgr_ttm->max_relocs);
 
-    intel_add_validate_reloc(bufmgr_ttm, &reloc);
+    this_reloc = reloc_buf_ttm->relocs + I915_RELOC_HEADER +
+	num_relocs * I915_RELOC0_STRIDE;
+
+    this_reloc[0] = offset;
+    this_reloc[1] = delta;
+    this_reloc[2] = index;
+    this_reloc[3] = 0;
+
+    reloc_buf_ttm->relocs[0]++; /* Increment relocation count */
+    /* Check wraparound */
+    assert((reloc_buf_ttm->relocs[0] & 0xffff) != 0);
 }
 
 
@@ -829,7 +685,7 @@ dri_ttm_process_reloc(dri_bo *batch_buf, GLuint *count)
 {
     dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr;
     void *ptr;
-    int itemLoc;
+    int index;
 
     /* Add the batch buffer to the validation list.  There are no relocations
      * pointing to it.
@@ -837,7 +693,7 @@ dri_ttm_process_reloc(dri_bo *batch_buf, GLuint *count)
     intel_add_validate_buffer(bufmgr_ttm, batch_buf,
 			      DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE,
 			      DRM_BO_MASK_MEM | DRM_BO_FLAG_EXE,
-			      &itemLoc, NULL);
+			      &index);
 
     ptr = intel_setup_validate_list(bufmgr_ttm, count);
 
@@ -857,7 +713,7 @@ intel_update_buffer_offsets (dri_bufmgr_ttm *bufmgr_ttm)
         node = DRMLISTENTRY(struct intel_bo_node, l, head);
 	arg = &node->bo_arg;
 	rep = &arg->d.rep;
-	((dri_bo *) node->priv)->offset = rep->bo_info.offset;
+	node->bo->offset = rep->bo_info.offset;
     }
 }
 
@@ -867,10 +723,11 @@ dri_ttm_post_submit(dri_bo *batch_buf, dri_fence **last_fence)
     dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr;
 
     intel_update_buffer_offsets (bufmgr_ttm);
-    intel_free_validate_list(bufmgr_ttm);
-    intel_free_reloc_list(bufmgr_ttm);
 
-    intel_bo_free_list(&bufmgr_ttm->list);
+    if (bufmgr_ttm->bufmgr.debug)
+	dri_ttm_dump_validation_list(bufmgr_ttm);
+
+    intel_free_validate_list(bufmgr_ttm);
 }
 
 /**
@@ -892,13 +749,11 @@ intel_bufmgr_ttm_init(int fd, unsigned int fence_type,
     bufmgr_ttm->fd = fd;
     bufmgr_ttm->fence_type = fence_type;
     bufmgr_ttm->fence_type_flush = fence_type_flush;
-    _glthread_INIT_MUTEX(bufmgr_ttm->mutex);
 
     /* lets go with one relocation per every four dwords - purely heuristic */
     bufmgr_ttm->max_relocs = batch_size / sizeof(uint32_t) / 4;
 
-    intel_create_bo_list(10, &bufmgr_ttm->list, NULL);
-    intel_create_bo_list(1, &bufmgr_ttm->reloc_list, NULL);
+    intel_init_validate_list(&bufmgr_ttm->list);
 
     bufmgr_ttm->bufmgr.bo_alloc = dri_ttm_alloc;
     bufmgr_ttm->bufmgr.bo_alloc_static = dri_ttm_alloc_static;
@@ -913,6 +768,7 @@ intel_bufmgr_ttm_init(int fd, unsigned int fence_type,
     bufmgr_ttm->bufmgr.emit_reloc = dri_ttm_emit_reloc;
     bufmgr_ttm->bufmgr.process_relocs = dri_ttm_process_reloc;
     bufmgr_ttm->bufmgr.post_submit = dri_ttm_post_submit;
+    bufmgr_ttm->bufmgr.debug = GL_FALSE;
 
     return &bufmgr_ttm->bufmgr;
 }
diff --git a/src/mesa/drivers/dri/intel/intel_chipset.h b/src/mesa/drivers/dri/intel/intel_chipset.h
index 2f49bf77ea..4fc4c96376 100644
--- a/src/mesa/drivers/dri/intel/intel_chipset.h
+++ b/src/mesa/drivers/dri/intel/intel_chipset.h
@@ -76,3 +76,9 @@
 				 devid == PCI_CHIP_Q33_G || \
 				 IS_965(devid))
 
+#define IS_945(devid)		(devid == PCI_CHIP_I945_G || \
+				 devid == PCI_CHIP_I945_GM || \
+				 devid == PCI_CHIP_I945_GME || \
+				 devid == PCI_CHIP_G33_G || \
+				 devid == PCI_CHIP_Q33_G || \
+				 devid == PCI_CHIP_Q35_G)
diff --git a/src/mesa/drivers/dri/intel/intel_decode.c b/src/mesa/drivers/dri/intel/intel_decode.c
index a1a7ac9ffb..73f0fcd591 100644
--- a/src/mesa/drivers/dri/intel/intel_decode.c
+++ b/src/mesa/drivers/dri/intel/intel_decode.c
@@ -214,7 +214,7 @@ decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures)
 	instr_out(data, hw_offset, 2, "(%d,%d)\n",
 		  data[2] & 0xffff, data[2] >> 16);
 	instr_out(data, hw_offset, 3, "(%d,%d)\n",
-		  data[2] & 0xffff, data[2] >> 16);
+		  data[3] & 0xffff, data[3] >> 16);
 	instr_out(data, hw_offset, 4, "offset 0x%08x\n", data[4]);
 	instr_out(data, hw_offset, 5, "color\n");
 	return len;
@@ -918,6 +918,26 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures)
 
 	return len;
 
+    case 0x7900:
+	if (len != 4)
+	    fprintf(out, "Bad count in 3DSTATE_DRAWING_RECTANGLE\n");
+	if (count < 4)
+	    BUFFER_FAIL(count, len, "3DSTATE_DRAWING_RECTANGLE");
+
+	instr_out(data, hw_offset, 0,
+		  "3DSTATE_DRAWING_RECTANGLE\n");
+	instr_out(data, hw_offset, 1, "top left: %d,%d\n",
+		  data[1] & 0xffff,
+		  (data[1] >> 16) & 0xffff);
+	instr_out(data, hw_offset, 2, "bottom right: %d,%d\n",
+		  data[2] & 0xffff,
+		  (data[2] >> 16) & 0xffff);
+	instr_out(data, hw_offset, 3, "origin: %d,%d\n",
+		  (int)data[3] & 0xffff,
+		  ((int)data[3] >> 16) & 0xffff);
+
+	return len;
+
     case 0x7905:
 	if (len != 5)
 	    fprintf(out, "Bad count in 3DSTATE_DEPTH_BUFFER\n");
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 034304f91c..8d75c63cef 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -493,7 +493,8 @@ intel_bind_framebuffer(GLcontext * ctx, GLenum target,
    if (target == GL_FRAMEBUFFER_EXT || target == GL_DRAW_FRAMEBUFFER_EXT) {
       intel_draw_buffer(ctx, fb);
       /* Integer depth range depends on depth buffer bits */
-      ctx->Driver.DepthRange(ctx, ctx->Viewport.Near, ctx->Viewport.Far);
+      if (ctx->Driver.DepthRange != NULL)
+	 ctx->Driver.DepthRange(ctx, ctx->Viewport.Near, ctx->Viewport.Far);
    }
    else {
       /* don't need to do anything if target == GL_READ_FRAMEBUFFER_EXT */
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
index 0acf956a38..7637585033 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -28,6 +28,7 @@
 #include "intel_context.h"
 #include "intel_mipmap_tree.h"
 #include "intel_regions.h"
+#include "intel_chipset.h"
 #include "enums.h"
 
 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
@@ -50,13 +51,15 @@ target_to_target(GLenum target)
 
 struct intel_mipmap_tree *
 intel_miptree_create(struct intel_context *intel,
-                     GLenum target,
-                     GLenum internal_format,
-                     GLuint first_level,
-                     GLuint last_level,
-                     GLuint width0,
-                     GLuint height0,
-                     GLuint depth0, GLuint cpp, GLuint compress_byte)
+		     GLenum target,
+		     GLenum internal_format,
+		     GLuint first_level,
+		     GLuint last_level,
+		     GLuint width0,
+		     GLuint height0,
+		     GLuint depth0,
+		     GLuint cpp,
+		     GLuint compress_byte)
 {
    GLboolean ok;
    struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
@@ -75,54 +78,19 @@ intel_miptree_create(struct intel_context *intel,
    mt->cpp = compress_byte ? compress_byte : cpp;
    mt->compressed = compress_byte ? 1 : 0;
    mt->refcount = 1; 
+   mt->pitch = 0;
 
-   switch (intel->intelScreen->deviceID) {
-   case PCI_CHIP_I945_G:
-   case PCI_CHIP_I945_GM:
-   case PCI_CHIP_I945_GME:
-   case PCI_CHIP_G33_G:
-   case PCI_CHIP_Q33_G:
-   case PCI_CHIP_Q35_G:
-      ok = i945_miptree_layout(mt);
-      break;
-   case PCI_CHIP_I915_G:
-   case PCI_CHIP_I915_GM:
-   case PCI_CHIP_I830_M:
-   case PCI_CHIP_I855_GM:
-   case PCI_CHIP_I865_G:
-   default:
-      /* All the i830 chips and the i915 use this layout:
-       */
-      ok = i915_miptree_layout(mt);
-      break;
-   }
+#ifdef I915
+   if (IS_945(intel->intelScreen->deviceID))
+      ok = i945_miptree_layout(intel, mt);
+   else
+      ok = i915_miptree_layout(intel, mt);
+#else
+   ok = brw_miptree_layout(intel, mt);
+#endif
 
    if (ok) {
-      if (!mt->compressed) {
-	 int align;
-
-	 if (intel->ttm) {
-	    /* XXX: Align pitch to multiple of 64 bytes for now to allow
-	     * render-to-texture to work in all cases. This should probably be
-	     * replaced at some point by some scheme to only do this when really
-	     * necessary.
-	     */
-	    align = 63;
-	 } else {
-	    align = 3;
-	 }
-
-	 mt->pitch = (mt->pitch * cpp + align) & ~align;
-
-	 /* XXX: At least the i915 seems very upset when the pitch is a multiple
-	  * of 1024 and sometimes 512 bytes - performance can drop by several
-	  * times. Go to the next multiple of the required alignment for now.
-	  */
-	 if (!(mt->pitch & 511))
-	    mt->pitch += align + 1;
-
-	 mt->pitch /= cpp;
-      }
+      assert (mt->pitch);
 
       mt->region = intel_region_alloc(intel,
                                       mt->cpp, mt->pitch, mt->total_height);
@@ -136,6 +104,52 @@ intel_miptree_create(struct intel_context *intel,
    return mt;
 }
 
+/**
+ * intel_miptree_pitch_align:
+ *
+ * @intel: intel context pointer
+ *
+ * @mt: the miptree to compute pitch alignment for
+ *
+ * @pitch: the natural pitch value
+ *
+ * Given @pitch, compute a larger value which accounts for
+ * any necessary alignment required by the device
+ */
+
+int intel_miptree_pitch_align (struct intel_context *intel,
+			       struct intel_mipmap_tree *mt,
+			       int pitch)
+{
+   if (!mt->compressed) {
+      int pitch_align;
+
+      if (intel->ttm) {
+	 /* XXX: Align pitch to multiple of 64 bytes for now to allow
+	  * render-to-texture to work in all cases. This should probably be
+	  * replaced at some point by some scheme to only do this when really
+	  * necessary.
+	  */
+	 pitch_align = 64;
+      } else {
+	 pitch_align = 4;
+      }
+
+      pitch = ALIGN(pitch * mt->cpp, pitch_align);
+
+#ifdef I915
+      /* XXX: At least the i915 seems very upset when the pitch is a multiple
+       * of 1024 and sometimes 512 bytes - performance can drop by several
+       * times. Go to the next multiple of the required alignment for now.
+       */
+      if (!(pitch & 511))
+	 pitch += pitch_align;
+#endif
+
+      pitch /= mt->cpp;
+   }
+   return pitch;
+}
 
 void
 intel_miptree_reference(struct intel_mipmap_tree **dst,
@@ -207,11 +221,11 @@ intel_miptree_match_image(struct intel_mipmap_tree *mt,
 
 void
 intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
-                             GLuint level,
-                             GLuint nr_images,
-                             GLuint x, GLuint y, GLuint w, GLuint h, GLuint d)
+			     GLuint level,
+			     GLuint nr_images,
+			     GLuint x, GLuint y,
+			     GLuint w, GLuint h, GLuint d)
 {
-
    mt->level[level].width = w;
    mt->level[level].height = h;
    mt->level[level].depth = d;
@@ -238,7 +252,8 @@ intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
 
 void
 intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
-                               GLuint level, GLuint img, GLuint x, GLuint y)
+			       GLuint level, GLuint img,
+			       GLuint x, GLuint y)
 {
    if (img == 0 && level == 0)
       assert(x == 0 && y == 0);
@@ -271,12 +286,12 @@ intel_miptree_depth_offsets(struct intel_mipmap_tree *mt, GLuint level)
 
 
 GLuint
-intel_miptree_image_offset(struct intel_mipmap_tree * mt,
-                           GLuint face, GLuint level)
+intel_miptree_image_offset(struct intel_mipmap_tree *mt,
+			   GLuint face, GLuint level)
 {
    if (mt->target == GL_TEXTURE_CUBE_MAP_ARB)
       return (mt->level[level].level_offset +
-              mt->level[level].image_offset[face] * mt->cpp);
+	      mt->level[level].image_offset[face] * mt->cpp);
    else
       return mt->level[level].level_offset;
 }
@@ -323,11 +338,12 @@ intel_miptree_image_unmap(struct intel_context *intel,
  */
 void
 intel_miptree_image_data(struct intel_context *intel,
-                         struct intel_mipmap_tree *dst,
-                         GLuint face,
-                         GLuint level,
-                         void *src,
-                         GLuint src_row_pitch, GLuint src_image_pitch)
+			 struct intel_mipmap_tree *dst,
+			 GLuint face,
+			 GLuint level,
+			 void *src,
+			 GLuint src_row_pitch,
+			 GLuint src_image_pitch)
 {
    GLuint depth = dst->level[level].depth;
    GLuint dst_offset = intel_miptree_image_offset(dst, face, level);
@@ -335,18 +351,19 @@ intel_miptree_image_data(struct intel_context *intel,
    GLuint i;
    GLuint height = 0;
 
-   DBG("%s\n", __FUNCTION__);
+   DBG("%s: %d/%d\n", __FUNCTION__, face, level);
    for (i = 0; i < depth; i++) {
       height = dst->level[level].height;
       if(dst->compressed)
 	 height /= 4;
-      intel_region_data(intel, dst->region,
-                        dst_offset + dst_depth_offset[i], /* dst_offset */
-                        0, 0,                             /* dstx, dsty */
-                        src,
-                        src_row_pitch,
-                        0, 0,                             /* source x, y */
-                        dst->level[level].width, height); /* width, height */
+      intel_region_data(intel,
+			dst->region,
+			dst_offset + dst_depth_offset[i], /* dst_offset */
+			0, 0,                             /* dstx, dsty */
+			src,
+			src_row_pitch,
+			0, 0,                             /* source x, y */
+			dst->level[level].width, height); /* width, height */
 
       src += src_image_pitch * dst->cpp;
    }
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
index ecdb7be244..968eec4fec 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
@@ -123,6 +123,10 @@ struct intel_mipmap_tree *intel_miptree_create(struct intel_context *intel,
                                                GLuint cpp,
                                                GLuint compress_byte);
 
+int intel_miptree_pitch_align (struct intel_context *intel,
+			       struct intel_mipmap_tree *mt,
+			       int pitch);
+
 void intel_miptree_reference(struct intel_mipmap_tree **dst,
                              struct intel_mipmap_tree *src);
 
@@ -190,9 +194,11 @@ void intel_miptree_image_copy(struct intel_context *intel,
 
 /* i915_mipmap_tree.c:
  */
-GLboolean i915_miptree_layout(struct intel_mipmap_tree *mt);
-GLboolean i945_miptree_layout(struct intel_mipmap_tree *mt);
-
-
+GLboolean i915_miptree_layout(struct intel_context *intel,
+			      struct intel_mipmap_tree *mt);
+GLboolean i945_miptree_layout(struct intel_context *intel,
+			      struct intel_mipmap_tree *mt);
+GLboolean brw_miptree_layout(struct intel_context *intel,
+			     struct intel_mipmap_tree *mt);
 
 #endif
diff --git a/src/mesa/drivers/dri/intel/intel_pixel.c b/src/mesa/drivers/dri/intel/intel_pixel.c
new file mode 100644
index 0000000000..9018e3daef
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_pixel.c
@@ -0,0 +1,120 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portionsalloc
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "enums.h"
+#include "state.h"
+#include "swrast/swrast.h"
+
+#include "intel_context.h"
+#include "intel_pixel.h"
+#include "intel_regions.h"
+
+
+/**
+ * Check if any fragment operations are in effect which might effect
+ * glDraw/CopyPixels.
+ */
+GLboolean
+intel_check_blit_fragment_ops(GLcontext * ctx)
+{
+   if (ctx->NewState)
+      _mesa_update_state(ctx);
+
+   /* XXX Note: Scissor could be done with the blitter:
+    */
+   return !(ctx->_ImageTransferState ||
+            ctx->Color.AlphaEnabled ||
+            ctx->Depth.Test ||
+            ctx->Fog.Enabled ||
+            ctx->Scissor.Enabled ||
+            ctx->Stencil.Enabled ||
+            !ctx->Color.ColorMask[0] ||
+            !ctx->Color.ColorMask[1] ||
+            !ctx->Color.ColorMask[2] ||
+            !ctx->Color.ColorMask[3] ||
+            ctx->Texture._EnabledUnits || 
+	    ctx->FragmentProgram._Enabled ||
+	    ctx->Color.BlendEnabled);
+}
+
+
+GLboolean
+intel_check_meta_tex_fragment_ops(GLcontext * ctx)
+{
+   if (ctx->NewState)
+      _mesa_update_state(ctx);
+
+   /* Some of _ImageTransferState (scale, bias) could be done with
+    * fragment programs on i915.
+    */
+   return !(ctx->_ImageTransferState || ctx->Fog.Enabled ||     /* not done yet */
+            ctx->Texture._EnabledUnits || ctx->FragmentProgram._Enabled);
+}
+
+/* The intel_region struct doesn't really do enough to capture the
+ * format of the pixels in the region.  For now this code assumes that
+ * the region is a display surface and hence is either ARGB8888 or
+ * RGB565.
+ * XXX FBO: If we'd pass in the intel_renderbuffer instead of region, we'd
+ * know the buffer's pixel format.
+ *
+ * \param format  as given to glDraw/ReadPixels
+ * \param type  as given to glDraw/ReadPixels
+ */
+GLboolean
+intel_check_blit_format(struct intel_region * region,
+                        GLenum format, GLenum type)
+{
+   if (region->cpp == 4 &&
+       (type == GL_UNSIGNED_INT_8_8_8_8_REV ||
+        type == GL_UNSIGNED_BYTE) && format == GL_BGRA) {
+      return GL_TRUE;
+   }
+
+   if (region->cpp == 2 &&
+       type == GL_UNSIGNED_SHORT_5_6_5_REV && format == GL_BGR) {
+      return GL_TRUE;
+   }
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s: bad format for blit (cpp %d, type %s format %s)\n",
+              __FUNCTION__, region->cpp,
+              _mesa_lookup_enum_by_nr(type), _mesa_lookup_enum_by_nr(format));
+
+   return GL_FALSE;
+}
+
+
+void
+intelInitPixelFuncs(struct dd_function_table *functions)
+{
+   functions->Accum = _swrast_Accum;
+   functions->Bitmap = _swrast_Bitmap;
+   functions->CopyPixels = intelCopyPixels;
+   functions->ReadPixels = intelReadPixels;
+   functions->DrawPixels = intelDrawPixels;
+}
diff --git a/src/mesa/drivers/dri/intel/intel_pixel.h b/src/mesa/drivers/dri/intel/intel_pixel.h
new file mode 100644
index 0000000000..a6fcf90ce0
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_pixel.h
@@ -0,0 +1,63 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTEL_PIXEL_H
+#define INTEL_PIXEL_H
+
+#include "mtypes.h"
+
+void intelInitPixelFuncs(struct dd_function_table *functions);
+
+GLboolean intel_check_blit_fragment_ops(GLcontext * ctx);
+
+GLboolean intel_check_meta_tex_fragment_ops(GLcontext * ctx);
+
+GLboolean intel_check_blit_format(struct intel_region *region,
+                                  GLenum format, GLenum type);
+
+
+void intelReadPixels(GLcontext * ctx,
+                     GLint x, GLint y,
+                     GLsizei width, GLsizei height,
+                     GLenum format, GLenum type,
+                     const struct gl_pixelstore_attrib *pack,
+                     GLvoid * pixels);
+
+void intelDrawPixels(GLcontext * ctx,
+                     GLint x, GLint y,
+                     GLsizei width, GLsizei height,
+                     GLenum format,
+                     GLenum type,
+                     const struct gl_pixelstore_attrib *unpack,
+                     const GLvoid * pixels);
+
+void intelCopyPixels(GLcontext * ctx,
+                     GLint srcx, GLint srcy,
+                     GLsizei width, GLsizei height,
+                     GLint destx, GLint desty, GLenum type);
+
+#endif
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
new file mode 100644
index 0000000000..3777422619
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
@@ -0,0 +1,357 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portionsalloc
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "glheader.h"
+#include "enums.h"
+#include "image.h"
+#include "colormac.h"
+#include "mtypes.h"
+#include "macros.h"
+#include "bufferobj.h"
+#include "swrast/swrast.h"
+
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_ioctl.h"
+#include "intel_batchbuffer.h"
+#include "intel_blit.h"
+#include "intel_regions.h"
+#include "intel_buffer_objects.h"
+
+
+
+#define FILE_DEBUG_FLAG DEBUG_PIXEL
+
+
+/* Unlike the other intel_pixel_* functions, the expectation here is
+ * that the incoming data is not in a PBO.  With the XY_TEXT blit
+ * method, there's no benefit haveing it in a PBO, but we could
+ * implement a path based on XY_MONO_SRC_COPY_BLIT which might benefit
+ * PBO bitmaps.  I think they are probably pretty rare though - I
+ * wonder if Xgl uses them?
+ */
+static const GLubyte *map_pbo( GLcontext *ctx,
+			       GLsizei width, GLsizei height,
+			       const struct gl_pixelstore_attrib *unpack,
+			       const GLubyte *bitmap )
+{
+   GLubyte *buf;
+
+   if (!_mesa_validate_pbo_access(2, unpack, width, height, 1,
+				  GL_COLOR_INDEX, GL_BITMAP,
+				  (GLvoid *) bitmap)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,"glBitmap(invalid PBO access)");
+      return NULL;
+   }
+
+   buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
+					   GL_READ_ONLY_ARB,
+					   unpack->BufferObj);
+   if (!buf) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glBitmap(PBO is mapped)");
+      return NULL;
+   }
+
+   return ADD_POINTERS(buf, bitmap);
+}
+
+static GLboolean test_bit( const GLubyte *src,
+			    GLuint bit )
+{
+   return (src[bit/8] & (1<<(bit % 8))) ? 1 : 0;
+}
+
+static void set_bit( GLubyte *dest,
+			  GLuint bit )
+{
+   dest[bit/8] |= 1 << (bit % 8);
+}
+
+/* Extract a rectangle's worth of data from the bitmap.  Called
+ * per-cliprect.
+ */
+static GLuint get_bitmap_rect(GLsizei width, GLsizei height,
+			      const struct gl_pixelstore_attrib *unpack,
+			      const GLubyte *bitmap,
+			      GLuint x, GLuint y, 
+			      GLuint w, GLuint h,
+			      GLubyte *dest,
+			      GLuint row_align,
+			      GLboolean invert)
+{
+   GLuint src_offset = (x + unpack->SkipPixels) & 0x7;
+   GLuint mask = unpack->LsbFirst ? 0 : 7;
+   GLuint bit = 0;
+   GLint row, col;
+   GLint first, last;
+   GLint incr;
+   GLuint count = 0;
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      _mesa_printf("%s %d,%d %dx%d bitmap %dx%d skip %d src_offset %d mask %d\n",
+		   __FUNCTION__, x,y,w,h,width,height,unpack->SkipPixels, src_offset, mask);
+
+   if (invert) {
+      first = h-1;
+      last = 0;
+      incr = -1;
+   }
+   else {
+      first = 0;
+      last = h-1;
+      incr = 1;
+   }
+
+   /* Require that dest be pre-zero'd.
+    */
+   for (row = first; row != (last+incr); row += incr) {
+      const GLubyte *rowsrc = _mesa_image_address2d(unpack, bitmap, 
+						    width, height, 
+						    GL_COLOR_INDEX, GL_BITMAP, 
+						    y + row, x);
+
+      for (col = 0; col < w; col++, bit++) {
+	 if (test_bit(rowsrc, (col + src_offset) ^ mask)) {
+	    set_bit(dest, bit ^ 7);
+	    count++;
+	 }
+      }
+
+      if (row_align)
+	 bit = ALIGN(bit, row_align);
+   }
+
+   return count;
+}
+
+
+
+
+/*
+ * Render a bitmap.
+ */
+static GLboolean
+do_blit_bitmap( GLcontext *ctx, 
+		GLint dstx, GLint dsty,
+		GLsizei width, GLsizei height,
+		const struct gl_pixelstore_attrib *unpack,
+		const GLubyte *bitmap )
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_region *dst = intel_drawbuf_region(intel);
+   GLfloat tmpColor[4];
+
+   union {
+      GLuint ui;
+      GLubyte ub[4];
+   } color;
+
+   if (!dst)
+       return GL_FALSE;
+
+   if (unpack->BufferObj->Name) {
+      bitmap = map_pbo(ctx, width, height, unpack, bitmap);
+      if (bitmap == NULL)
+	 return GL_TRUE;	/* even though this is an error, we're done */
+   }
+
+   COPY_4V(tmpColor, ctx->Current.RasterColor);
+
+   if (NEED_SECONDARY_COLOR(ctx)) {
+       ADD_3V(tmpColor, tmpColor, ctx->Current.RasterSecondaryColor);
+   }
+
+   UNCLAMPED_FLOAT_TO_CHAN(color.ub[0], tmpColor[2]);
+   UNCLAMPED_FLOAT_TO_CHAN(color.ub[1], tmpColor[1]);
+   UNCLAMPED_FLOAT_TO_CHAN(color.ub[2], tmpColor[0]);
+   UNCLAMPED_FLOAT_TO_CHAN(color.ub[3], tmpColor[3]);
+
+   /* Does zoom apply to bitmaps?
+    */
+   if (!intel_check_blit_fragment_ops(ctx) ||
+       ctx->Pixel.ZoomX != 1.0F || 
+       ctx->Pixel.ZoomY != 1.0F)
+      return GL_FALSE;
+
+   LOCK_HARDWARE(intel);
+
+   if (intel->driDrawable->numClipRects) {
+      __DRIdrawablePrivate *dPriv = intel->driDrawable;
+      drm_clip_rect_t *box = dPriv->pClipRects;
+      drm_clip_rect_t dest_rect;
+      GLint nbox = dPriv->numClipRects;
+      GLint srcx = 0, srcy = 0;
+      GLint orig_screen_x1, orig_screen_y2;
+      GLuint i;
+
+
+      orig_screen_x1 = dPriv->x + dstx;
+      orig_screen_y2 = dPriv->y + (dPriv->h - dsty);
+
+      /* Do scissoring in GL coordinates:
+       */
+      if (ctx->Scissor.Enabled)
+      {
+	 GLint x = ctx->Scissor.X;
+	 GLint y = ctx->Scissor.Y;
+	 GLuint w = ctx->Scissor.Width;
+	 GLuint h = ctx->Scissor.Height;
+
+         if (!_mesa_clip_to_region(x, y, x+w-1, y+h-1, &dstx, &dsty, &width, &height))
+            goto out;
+      }
+
+      /* Convert from GL to hardware coordinates:
+       */
+      dsty = dPriv->y + (dPriv->h - dsty - height);  
+      dstx = dPriv->x + dstx;
+
+      dest_rect.x1 = dstx < 0 ? 0 : dstx;
+      dest_rect.y1 = dsty < 0 ? 0 : dsty;
+      dest_rect.x2 = dstx + width < 0 ? 0 : dstx + width;
+      dest_rect.y2 = dsty + height < 0 ? 0 : dsty + height;
+
+      for (i = 0; i < nbox; i++) {
+         drm_clip_rect_t rect;
+	 int box_w, box_h;
+	 GLint px, py;
+	 GLuint stipple[32];  
+
+         if (!intel_intersect_cliprects(&rect, &dest_rect, &box[i]))
+            continue;
+
+	 /* Now go back to GL coordinates to figure out what subset of
+	  * the bitmap we are uploading for this cliprect:
+	  */
+	 box_w = rect.x2 - rect.x1;
+	 box_h = rect.y2 - rect.y1;
+	 srcx = rect.x1 - orig_screen_x1;
+	 srcy = orig_screen_y2 - rect.y2;
+
+
+#define DY 32
+#define DX 32
+
+	 /* Then, finally, chop it all into chunks that can be
+	  * digested by hardware:
+	  */
+	 for (py = 0; py < box_h; py += DY) { 
+	    for (px = 0; px < box_w; px += DX) { 
+	       int h = MIN2(DY, box_h - py);
+	       int w = MIN2(DX, box_w - px); 
+	       GLuint sz = ALIGN(ALIGN(w,8) * h, 64)/8;
+	       GLenum logic_op = ctx->Color.ColorLogicOpEnabled ?
+		  ctx->Color.LogicOp : GL_COPY;
+
+	       assert(sz <= sizeof(stipple));
+	       memset(stipple, 0, sz);
+
+	       /* May need to adjust this when padding has been introduced in
+		* sz above:
+		*/
+	       if (get_bitmap_rect(width, height, unpack, 
+				   bitmap,
+				   srcx + px, srcy + py, w, h,
+				   (GLubyte *)stipple,
+				   8,
+				   GL_TRUE) == 0)
+		  continue;
+
+	       /* 
+		*/
+	       intelEmitImmediateColorExpandBlit( intel,
+						  dst->cpp,
+						  (GLubyte *)stipple, 
+						  sz,
+						  color.ui,
+						  dst->pitch,
+						  dst->buffer,
+						  0,
+						  dst->tiled,
+						  rect.x1 + px,
+						  rect.y2 - (py + h),
+						  w, h,
+						  logic_op);
+	    } 
+	 } 
+      }
+      intel->need_flush = GL_TRUE;
+   out:
+      intel_batchbuffer_flush(intel->batch);
+   }
+   UNLOCK_HARDWARE(intel);
+
+
+   if (unpack->BufferObj->Name) {
+      /* done with PBO so unmap it now */
+      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
+                              unpack->BufferObj);
+   }
+
+   return GL_TRUE;
+}
+
+
+
+
+
+/* There are a large number of possible ways to implement bitmap on
+ * this hardware, most of them have some sort of drawback.  Here are a
+ * few that spring to mind:
+ * 
+ * Blit:
+ *    - XY_MONO_SRC_BLT_CMD
+ *         - use XY_SETUP_CLIP_BLT for cliprect clipping.
+ *    - XY_TEXT_BLT
+ *    - XY_TEXT_IMMEDIATE_BLT
+ *         - blit per cliprect, subject to maximum immediate data size.
+ *    - XY_COLOR_BLT 
+ *         - per pixel or run of pixels
+ *    - XY_PIXEL_BLT
+ *         - good for sparse bitmaps
+ *
+ * 3D engine:
+ *    - Point per pixel
+ *    - Translate bitmap to an alpha texture and render as a quad
+ *    - Chop bitmap up into 32x32 squares and render w/polygon stipple.
+ */
+void
+intelBitmap(GLcontext * ctx,
+	    GLint x, GLint y,
+	    GLsizei width, GLsizei height,
+	    const struct gl_pixelstore_attrib *unpack,
+	    const GLubyte * pixels)
+{
+   if (do_blit_bitmap(ctx, x, y, width, height,
+                          unpack, pixels))
+      return;
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      _mesa_printf("%s: fallback to swrast\n", __FUNCTION__);
+
+   _swrast_Bitmap(ctx, x, y, width, height, unpack, pixels);
+}
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_copy.c b/src/mesa/drivers/dri/intel/intel_pixel_copy.c
new file mode 100644
index 0000000000..c453097e55
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_pixel_copy.c
@@ -0,0 +1,382 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "glheader.h"
+#include "enums.h"
+#include "image.h"
+#include "state.h"
+#include "mtypes.h"
+#include "macros.h"
+#include "swrast/swrast.h"
+
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_ioctl.h"
+#include "intel_batchbuffer.h"
+#include "intel_buffers.h"
+#include "intel_blit.h"
+#include "intel_regions.h"
+#include "intel_tris.h"
+#include "intel_pixel.h"
+
+#define FILE_DEBUG_FLAG DEBUG_PIXEL
+
+static struct intel_region *
+copypix_src_region(struct intel_context *intel, GLenum type)
+{
+   switch (type) {
+   case GL_COLOR:
+      return intel_readbuf_region(intel);
+   case GL_DEPTH:
+      /* Don't think this is really possible execpt at 16bpp, when we have no stencil.
+       */
+      if (intel->depth_region && intel->depth_region->cpp == 2)
+         return intel->depth_region;
+   case GL_STENCIL:
+      /* Don't think this is really possible. 
+       */
+      break;
+   case GL_DEPTH_STENCIL_EXT:
+      /* Does it matter whether it is stencil/depth or depth/stencil?
+       */
+      return intel->depth_region;
+   default:
+      break;
+   }
+
+   return NULL;
+}
+
+
+/**
+ * Check if any fragment operations are in effect which might effect
+ * glCopyPixels.  Differs from intel_check_blit_fragment_ops in that
+ * we allow Scissor.
+ */
+static GLboolean
+intel_check_copypixel_blit_fragment_ops(GLcontext * ctx)
+{
+   if (ctx->NewState)
+      _mesa_update_state(ctx);
+
+   /* Could do logicop with the blitter: 
+    */
+   return !(ctx->_ImageTransferState ||
+            ctx->Color.AlphaEnabled ||
+            ctx->Depth.Test ||
+            ctx->Fog.Enabled ||
+            ctx->Stencil.Enabled ||
+            !ctx->Color.ColorMask[0] ||
+            !ctx->Color.ColorMask[1] ||
+            !ctx->Color.ColorMask[2] ||
+            !ctx->Color.ColorMask[3] ||
+            ctx->Texture._EnabledUnits ||
+	    ctx->FragmentProgram._Enabled ||
+	    ctx->Color.BlendEnabled);
+}
+
+/* Doesn't work for overlapping regions.  Could do a double copy or
+ * just fallback.
+ */
+static GLboolean
+do_texture_copypixels(GLcontext * ctx,
+                      GLint srcx, GLint srcy,
+                      GLsizei width, GLsizei height,
+                      GLint dstx, GLint dsty, GLenum type)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_region *dst = intel_drawbuf_region(intel);
+   struct intel_region *src = copypix_src_region(intel, type);
+   GLenum src_format;
+   GLenum src_type;
+
+   DBG("%s %d,%d %dx%d --> %d,%d\n", __FUNCTION__, 
+       srcx, srcy, width, height, dstx, dsty);
+
+   if (!src || !dst || type != GL_COLOR)
+      return GL_FALSE;
+
+   /* Can't handle overlapping regions.  Don't have sufficient control
+    * over rasterization to pull it off in-place.  Punt on these for
+    * now.
+    * 
+    * XXX: do a copy to a temporary. 
+    */
+   if (src->buffer == dst->buffer) {
+      drm_clip_rect_t srcbox;
+      drm_clip_rect_t dstbox;
+      drm_clip_rect_t tmp;
+
+      srcbox.x1 = srcx;
+      srcbox.y1 = srcy;
+      srcbox.x2 = srcx + width;
+      srcbox.y2 = srcy + height;
+
+      dstbox.x1 = dstx;
+      dstbox.y1 = dsty;
+      dstbox.x2 = dstx + width * ctx->Pixel.ZoomX;
+      dstbox.y2 = dsty + height * ctx->Pixel.ZoomY;
+
+      DBG("src %d,%d %d,%d\n", srcbox.x1, srcbox.y1, srcbox.x2, srcbox.y2);
+      DBG("dst %d,%d %d,%d (%dx%d) (%f,%f)\n", dstbox.x1, dstbox.y1, dstbox.x2, dstbox.y2,
+	  width, height, ctx->Pixel.ZoomX, ctx->Pixel.ZoomY);
+
+      if (intel_intersect_cliprects(&tmp, &srcbox, &dstbox)) {
+         DBG("%s: regions overlap\n", __FUNCTION__);
+         return GL_FALSE;
+      }
+   }
+
+   intelFlush(&intel->ctx);
+
+   intel->vtbl.install_meta_state(intel);
+
+   /* Is this true?  Also will need to turn depth testing on according
+    * to state:
+    */
+   intel->vtbl.meta_no_stencil_write(intel);
+   intel->vtbl.meta_no_depth_write(intel);
+
+   /* Set the 3d engine to draw into the destination region:
+    */
+   intel->vtbl.meta_draw_region(intel, dst, intel->depth_region);
+
+   intel->vtbl.meta_import_pixel_state(intel);
+
+   if (src->cpp == 2) {
+      src_format = GL_RGB;
+      src_type = GL_UNSIGNED_SHORT_5_6_5;
+   }
+   else {
+      src_format = GL_BGRA;
+      src_type = GL_UNSIGNED_BYTE;
+   }
+
+   /* Set the frontbuffer up as a large rectangular texture.
+    */
+   if (!intel->vtbl.meta_tex_rect_source(intel, src->buffer, 0,
+                                         src->pitch,
+                                         src->height, src_format, src_type)) {
+      intel->vtbl.leave_meta_state(intel);
+      return GL_FALSE;
+   }
+
+
+   intel->vtbl.meta_texture_blend_replace(intel);
+
+   LOCK_HARDWARE(intel);
+
+   if (intel->driDrawable->numClipRects) {
+      __DRIdrawablePrivate *dPriv = intel->driDrawable;
+
+
+      srcy = dPriv->h - srcy - height;  /* convert from gl to hardware coords */
+
+      srcx += dPriv->x;
+      srcy += dPriv->y;
+
+      /* Clip against the source region.  This is the only source
+       * clipping we do.  XXX: Just set the texcord wrap mode to clamp
+       * or similar.
+       *
+       */
+      if (0) {
+         GLint orig_x = srcx;
+         GLint orig_y = srcy;
+
+         if (!_mesa_clip_to_region(0, 0, src->pitch, src->height,
+                                   &srcx, &srcy, &width, &height))
+            goto out;
+
+         dstx += srcx - orig_x;
+         dsty += (srcy - orig_y) * ctx->Pixel.ZoomY;
+      }
+
+      /* Just use the regular cliprect mechanism...  Does this need to
+       * even hold the lock???
+       */
+      intel->vtbl.meta_draw_quad(intel,
+				 dstx,
+				 dstx + width * ctx->Pixel.ZoomX,
+				 dPriv->h - (dsty + height * ctx->Pixel.ZoomY),
+				 dPriv->h - (dsty), 0, /* XXX: what z value? */
+				 0x00ff00ff,
+				 srcx, srcx + width, srcy, srcy + height);
+
+    out:
+      intel->vtbl.leave_meta_state(intel);
+      intel_batchbuffer_flush(intel->batch);
+   }
+   UNLOCK_HARDWARE(intel);
+
+   DBG("%s: success\n", __FUNCTION__);
+   return GL_TRUE;
+}
+
+
+
+
+
+/**
+ * CopyPixels with the blitter.  Don't support zooming, pixel transfer, etc.
+ */
+static GLboolean
+do_blit_copypixels(GLcontext * ctx,
+                   GLint srcx, GLint srcy,
+                   GLsizei width, GLsizei height,
+                   GLint dstx, GLint dsty, GLenum type)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_region *dst = intel_drawbuf_region(intel);
+   struct intel_region *src = copypix_src_region(intel, type);
+
+   /* Copypixels can be more than a straight copy.  Ensure all the
+    * extra operations are disabled:
+    */
+   if (!intel_check_copypixel_blit_fragment_ops(ctx) ||
+       ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F)
+      return GL_FALSE;
+
+   if (!src || !dst)
+      return GL_FALSE;
+
+
+
+   intelFlush(&intel->ctx);
+
+   LOCK_HARDWARE(intel);
+
+   if (intel->driDrawable->numClipRects) {
+      __DRIdrawablePrivate *dPriv = intel->driDrawable;
+      drm_clip_rect_t *box = dPriv->pClipRects;
+      drm_clip_rect_t dest_rect;
+      GLint nbox = dPriv->numClipRects;
+      GLint delta_x = 0;
+      GLint delta_y = 0;
+      GLuint i;
+
+      /* Do scissoring in GL coordinates:
+       */
+      if (ctx->Scissor.Enabled)
+      {
+	 GLint x = ctx->Scissor.X;
+	 GLint y = ctx->Scissor.Y;
+	 GLuint w = ctx->Scissor.Width;
+	 GLuint h = ctx->Scissor.Height;
+	 GLint dx = dstx - srcx;
+         GLint dy = dsty - srcy;
+
+         if (!_mesa_clip_to_region(x, y, x+w-1, y+h-1, &dstx, &dsty, &width, &height))
+            goto out;
+	 
+         srcx = dstx - dx;
+         srcy = dsty - dy;
+      }
+
+      /* Convert from GL to hardware coordinates:
+       */
+      dsty = dPriv->h - dsty - height;  
+      srcy = dPriv->h - srcy - height;  
+      dstx += dPriv->x;
+      dsty += dPriv->y;
+      srcx += dPriv->x;
+      srcy += dPriv->y;
+
+      /* Clip against the source region.  This is the only source
+       * clipping we do.  Dst is clipped with cliprects below.
+       */
+      {
+         delta_x = srcx - dstx;
+         delta_y = srcy - dsty;
+
+         if (!_mesa_clip_to_region(0, 0, src->pitch, src->height,
+                                   &srcx, &srcy, &width, &height))
+            goto out;
+
+         dstx = srcx - delta_x;
+         dsty = srcy - delta_y;
+      }
+
+      dest_rect.x1 = dstx;
+      dest_rect.y1 = dsty;
+      dest_rect.x2 = dstx + width;
+      dest_rect.y2 = dsty + height;
+
+      /* Could do slightly more clipping: Eg, take the intersection of
+       * the existing set of cliprects and those cliprects translated
+       * by delta_x, delta_y:
+       * 
+       * This code will not overwrite other windows, but will
+       * introduce garbage when copying from obscured window regions.
+       */
+      for (i = 0; i < nbox; i++) {
+         drm_clip_rect_t rect;
+
+         if (!intel_intersect_cliprects(&rect, &dest_rect, &box[i]))
+            continue;
+
+
+         intelEmitCopyBlit(intel, dst->cpp, 
+			   src->pitch, src->buffer, 0, src->tiled,
+			   dst->pitch, dst->buffer, 0, dst->tiled,
+			   rect.x1 + delta_x, 
+			   rect.y1 + delta_y,       /* srcx, srcy */
+                           rect.x1, rect.y1,    /* dstx, dsty */
+                           rect.x2 - rect.x1, rect.y2 - rect.y1,
+			   ctx->Color.ColorLogicOpEnabled ?
+			   ctx->Color.LogicOp : GL_COPY);
+      }
+
+    out:
+      intel_batchbuffer_flush(intel->batch);
+   }
+   UNLOCK_HARDWARE(intel);
+
+   DBG("%s: success\n", __FUNCTION__);
+   return GL_TRUE;
+}
+
+
+void
+intelCopyPixels(GLcontext * ctx,
+                GLint srcx, GLint srcy,
+                GLsizei width, GLsizei height,
+                GLint destx, GLint desty, GLenum type)
+{
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (do_blit_copypixels(ctx, srcx, srcy, width, height, destx, desty, type))
+      return;
+
+   if (do_texture_copypixels(ctx, srcx, srcy, width, height, destx, desty, type))
+      return;
+
+   DBG("fallback to _swrast_CopyPixels\n");
+
+   _swrast_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type);
+}
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_draw.c b/src/mesa/drivers/dri/intel/intel_pixel_draw.c
new file mode 100644
index 0000000000..566f884be0
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_pixel_draw.c
@@ -0,0 +1,386 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portionsalloc
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "glheader.h"
+#include "enums.h"
+#include "image.h"
+#include "mtypes.h"
+#include "macros.h"
+#include "bufferobj.h"
+#include "swrast/swrast.h"
+
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_ioctl.h"
+#include "intel_batchbuffer.h"
+#include "intel_blit.h"
+#include "intel_buffers.h"
+#include "intel_regions.h"
+#include "intel_pixel.h"
+#include "intel_buffer_objects.h"
+#include "intel_tris.h"
+
+
+
+static GLboolean
+do_texture_drawpixels(GLcontext * ctx,
+                      GLint x, GLint y,
+                      GLsizei width, GLsizei height,
+                      GLenum format, GLenum type,
+                      const struct gl_pixelstore_attrib *unpack,
+                      const GLvoid * pixels)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_region *dst = intel_drawbuf_region(intel);
+   struct intel_buffer_object *src = intel_buffer_object(unpack->BufferObj);
+   GLuint rowLength = unpack->RowLength ? unpack->RowLength : width;
+   GLuint src_offset;
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   intelFlush(&intel->ctx);
+   intel->vtbl.render_start(intel);
+   intel->vtbl.emit_state(intel);
+
+   if (!dst)
+      return GL_FALSE;
+
+   if (src) {
+      if (!_mesa_validate_pbo_access(2, unpack, width, height, 1,
+                                     format, type, pixels)) {
+         _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawPixels");
+         return GL_TRUE;
+      }
+   }
+   else {
+      /* PBO only for now:
+       */
+/*       _mesa_printf("%s - not PBO\n", __FUNCTION__); */
+      return GL_FALSE;
+   }
+
+   /* There are a couple of things we can't do yet, one of which is
+    * set the correct state for pixel operations when GL texturing is
+    * enabled.  That's a pretty rare state and probably not worth the
+    * effort.  A completely device-independent version of this may do
+    * more.
+    *
+    * Similarly, we make no attempt to merge metaops processing with
+    * an enabled fragment program, though it would certainly be
+    * possible.
+    */
+   if (!intel_check_meta_tex_fragment_ops(ctx)) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("%s - bad GL fragment state for metaops texture\n",
+                      __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   intel->vtbl.install_meta_state(intel);
+
+
+   /* Is this true?  Also will need to turn depth testing on according
+    * to state:
+    */
+   intel->vtbl.meta_no_stencil_write(intel);
+   intel->vtbl.meta_no_depth_write(intel);
+
+   /* Set the 3d engine to draw into the destination region:
+    */
+   intel->vtbl.meta_draw_region(intel, dst, intel->depth_region);
+
+   intel->vtbl.meta_import_pixel_state(intel);
+
+   src_offset = (GLuint) _mesa_image_address(2, unpack, pixels, width, height,
+                                             format, type, 0, 0, 0);
+
+
+   /* Setup the pbo up as a rectangular texture, if possible.
+    *
+    * TODO: This is almost always possible if the i915 fragment
+    * program is adjusted to correctly swizzle the sampled colors.
+    * The major exception is any 24bit texture, like RGB888, for which
+    * there is no hardware support.  
+    */
+   if (!intel->vtbl.meta_tex_rect_source(intel, src->buffer, src_offset,
+                                         rowLength, height, format, type)) {
+      intel->vtbl.leave_meta_state(intel);
+      return GL_FALSE;
+   }
+
+   intel->vtbl.meta_texture_blend_replace(intel);
+
+
+   LOCK_HARDWARE(intel);
+
+   if (intel->driDrawable->numClipRects) {
+      __DRIdrawablePrivate *dPriv = intel->driDrawable;
+      GLint srcx, srcy;
+      GLint dstx, dsty;
+
+      dstx = x;
+      dsty = dPriv->h - (y + height);
+
+      srcx = 0;                 /* skiprows/pixels already done */
+      srcy = 0;
+
+      if (0) {
+         const GLint orig_x = dstx;
+         const GLint orig_y = dsty;
+
+         if (!_mesa_clip_to_region(0, 0, dst->pitch, dst->height,
+                                   &dstx, &dsty, &width, &height))
+            goto out;
+
+         srcx += dstx - orig_x;
+         srcy += dsty - orig_y;
+      }
+
+
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("draw %d,%d %dx%d\n", dstx, dsty, width, height);
+
+      /* Must use the regular cliprect mechanism in order to get the
+       * drawing origin set correctly.  Otherwise scissor state is in
+       * incorrect coordinate space.  Does this even need to hold the
+       * lock???
+       */
+      intel->vtbl.meta_draw_quad(intel,
+				 dstx, dstx + width * ctx->Pixel.ZoomX,
+				 dPriv->h - (y + height * ctx->Pixel.ZoomY),
+				 dPriv->h - (y),
+				 -ctx->Current.RasterPos[2] * .5,
+				 0x00ff00ff,
+				 srcx, srcx + width, srcy + height, srcy);
+    out:
+      intel->vtbl.leave_meta_state(intel);
+      intel_batchbuffer_flush(intel->batch);
+   }
+   UNLOCK_HARDWARE(intel);
+   return GL_TRUE;
+}
+
+
+
+
+
+/* Pros:  
+ *   - no waiting for idle before updating framebuffer.
+ *   
+ * Cons:
+ *   - if upload is by memcpy, this may actually be slower than fallback path.
+ *   - uploads the whole image even if destination is clipped
+ *   
+ * Need to benchmark.
+ *
+ * Given the questions about performance, implement for pbo's only.
+ * This path is definitely a win if the pbo is already in agp.  If it
+ * turns out otherwise, we can add the code necessary to upload client
+ * data to agp space before performing the blit.  (Though it may turn
+ * out to be better/simpler just to use the texture engine).
+ */
+static GLboolean
+do_blit_drawpixels(GLcontext * ctx,
+                   GLint x, GLint y,
+                   GLsizei width, GLsizei height,
+                   GLenum format, GLenum type,
+                   const struct gl_pixelstore_attrib *unpack,
+                   const GLvoid * pixels)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_region *dest = intel_drawbuf_region(intel);
+   struct intel_buffer_object *src = intel_buffer_object(unpack->BufferObj);
+   GLuint src_offset;
+   GLuint rowLength;
+   dri_fence *fence = NULL;
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      _mesa_printf("%s\n", __FUNCTION__);
+
+
+   if (!dest) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("%s - no dest\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   if (src) {
+      /* This validation should be done by core mesa:
+       */
+      if (!_mesa_validate_pbo_access(2, unpack, width, height, 1,
+                                     format, type, pixels)) {
+         _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawPixels");
+         return GL_TRUE;
+      }
+   }
+   else {
+      /* PBO only for now:
+       */
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("%s - not PBO\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   if (!intel_check_blit_format(dest, format, type)) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("%s - bad format for blit\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   if (!intel_check_blit_fragment_ops(ctx)) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("%s - bad GL fragment state for blitter\n",
+                      __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   if (ctx->Pixel.ZoomX != 1.0F) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("%s - bad PixelZoomX for blit\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+
+   if (unpack->RowLength > 0)
+      rowLength = unpack->RowLength;
+   else
+      rowLength = width;
+
+   if (ctx->Pixel.ZoomY == -1.0F) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("%s - bad PixelZoomY for blit\n", __FUNCTION__);
+      return GL_FALSE;          /* later */
+      y -= height;
+   }
+   else if (ctx->Pixel.ZoomY == 1.0F) {
+      rowLength = -rowLength;
+   }
+   else {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("%s - bad PixelZoomY for blit\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   src_offset = (GLuint) _mesa_image_address(2, unpack, pixels, width, height,
+                                             format, type, 0, 0, 0);
+
+   intelFlush(&intel->ctx);
+   LOCK_HARDWARE(intel);
+
+   if (intel->driDrawable->numClipRects) {
+      __DRIdrawablePrivate *dPriv = intel->driDrawable;
+      int nbox = dPriv->numClipRects;
+      drm_clip_rect_t *box = dPriv->pClipRects;
+      drm_clip_rect_t rect;
+      drm_clip_rect_t dest_rect;
+      dri_bo *src_buffer = intel_bufferobj_buffer(intel, src, INTEL_READ);
+      int i;
+
+      dest_rect.x1 = dPriv->x + x;
+      dest_rect.y1 = dPriv->y + dPriv->h - (y + height);
+      dest_rect.x2 = dest_rect.x1 + width;
+      dest_rect.y2 = dest_rect.y1 + height;
+
+      for (i = 0; i < nbox; i++) {
+         if (!intel_intersect_cliprects(&rect, &dest_rect, &box[i]))
+            continue;
+
+         intelEmitCopyBlit(intel,
+                           dest->cpp,
+                           rowLength, src_buffer, src_offset, GL_FALSE,
+                           dest->pitch, dest->buffer, 0, dest->tiled,
+                           rect.x1 - dest_rect.x1,
+                           rect.y2 - dest_rect.y2,
+                           rect.x1,
+                           rect.y1, rect.x2 - rect.x1, rect.y2 - rect.y1,
+			   ctx->Color.ColorLogicOpEnabled ?
+			   ctx->Color.LogicOp : GL_COPY);
+      }
+      intel_batchbuffer_flush(intel->batch);
+      fence = intel->batch->last_fence;
+      dri_fence_reference(fence);
+   }
+   UNLOCK_HARDWARE(intel);
+
+   if (fence) {
+      dri_fence_wait(fence);
+      dri_fence_unreference(fence);
+   }
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      _mesa_printf("%s - DONE\n", __FUNCTION__);
+
+   return GL_TRUE;
+}
+
+
+
+void
+intelDrawPixels(GLcontext * ctx,
+                GLint x, GLint y,
+                GLsizei width, GLsizei height,
+                GLenum format,
+                GLenum type,
+                const struct gl_pixelstore_attrib *unpack,
+                const GLvoid * pixels)
+{
+   if (do_blit_drawpixels(ctx, x, y, width, height, format, type,
+                          unpack, pixels))
+      return;
+
+   if (do_texture_drawpixels(ctx, x, y, width, height, format, type,
+                             unpack, pixels))
+      return;
+
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      _mesa_printf("%s: fallback to swrast\n", __FUNCTION__);
+
+   if (ctx->FragmentProgram._Current == ctx->FragmentProgram._TexEnvProgram) {
+      /*
+       * We don't want the i915 texenv program to be applied to DrawPixels.
+       * This is really just a performance optimization (mesa will other-
+       * wise happily run the fragment program on each pixel in the image).
+       */
+      struct gl_fragment_program *fpSave = ctx->FragmentProgram._Current;
+   /* can't just set current frag prog to 0 here as on buffer resize
+      we'll get new state checks which will segfault. Remains a hack. */
+      ctx->FragmentProgram._Current = NULL;
+      ctx->FragmentProgram._UseTexEnvProgram = GL_FALSE;
+      ctx->FragmentProgram._Active = GL_FALSE;
+      _swrast_DrawPixels( ctx, x, y, width, height, format, type,
+                          unpack, pixels );
+      ctx->FragmentProgram._Current = fpSave;
+      ctx->FragmentProgram._UseTexEnvProgram = GL_TRUE;
+      ctx->FragmentProgram._Active = GL_TRUE;
+   }
+   else {
+      _swrast_DrawPixels( ctx, x, y, width, height, format, type,
+                          unpack, pixels );
+   }
+}
diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h
index 9e885c3b3b..37629c07e2 100644
--- a/src/mesa/drivers/dri/intel/intel_reg.h
+++ b/src/mesa/drivers/dri/intel/intel_reg.h
@@ -61,6 +61,9 @@
 
 #define XY_SRC_COPY_BLT_CMD             (CMD_2D | (0x53 << 22) | 6)
 
+#define XY_TEXT_IMMEDIATE_BLIT_CMD	(CMD_2D | (0x31 << 22))
+# define XY_TEXT_BYTE_PACKED		(1 << 16)
+
 /* BR00 */
 #define XY_BLT_WRITE_ALPHA	(1 << 21)
 #define XY_BLT_WRITE_RGB	(1 << 20)
diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c
index a47b288090..a5de01a3a8 100644
--- a/src/mesa/drivers/dri/intel/intel_regions.c
+++ b/src/mesa/drivers/dri/intel/intel_regions.c
@@ -49,23 +49,6 @@
 
 #define FILE_DEBUG_FLAG DEBUG_REGION
 
-void
-intel_region_idle(struct intel_context *intel, struct intel_region *region)
-{
-   DBG("%s\n", __FUNCTION__);
-   /* XXX: Using this function is likely bogus -- it ought to only have been
-    * used before a map, anyway, but leave this cheap implementation of it
-    * for now.
-    */
-   if (region && region->buffer) {
-      /* Mapping it for read will ensure that any acceleration to the region
-       * would have landed already.
-       */
-      dri_bo_map(region->buffer, GL_TRUE);
-      dri_bo_unmap(region->buffer);
-   }
-}
-
 /* XXX: Thread safety?
  */
 GLubyte *
@@ -195,6 +178,8 @@ intel_region_data(struct intel_context *intel,
                   const void *src, GLuint src_pitch,
                   GLuint srcx, GLuint srcy, GLuint width, GLuint height)
 {
+   GLboolean locked = GL_FALSE;
+
    DBG("%s\n", __FUNCTION__);
 
    if (intel == NULL)
@@ -208,8 +193,10 @@ intel_region_data(struct intel_context *intel,
          intel_region_cow(intel, dst);
    }
 
-
-   LOCK_HARDWARE(intel);
+   if (!intel->locked) {
+      LOCK_HARDWARE(intel);
+      locked = GL_TRUE;
+   }
 
    _mesa_copy_rect(intel_region_map(intel, dst) + dst_offset,
                    dst->cpp,
@@ -218,7 +205,8 @@ intel_region_data(struct intel_context *intel,
 
    intel_region_unmap(intel, dst);
 
-   UNLOCK_HARDWARE(intel);
+   if (locked)
+      UNLOCK_HARDWARE(intel);
 
 }
 
@@ -459,6 +447,7 @@ intel_recreate_static_regions(struct intel_context *intel)
 			    &intelScreen->back,
 			    DRM_BO_FLAG_MEM_TT);
 
+#ifdef I915
    if (intelScreen->third.handle) {
       intel->third_region =
 	 intel_recreate_static(intel, "third",
@@ -466,6 +455,7 @@ intel_recreate_static_regions(struct intel_context *intel)
 			       &intelScreen->third,
 			       DRM_BO_FLAG_MEM_TT);
    }
+#endif /* I915 */
 
    /* Still assumes front.cpp == depth.cpp.  We can kill this when we move to
     * private buffers.
diff --git a/src/mesa/drivers/dri/intel/intel_regions.h b/src/mesa/drivers/dri/intel/intel_regions.h
index 1975d729e4..b6a3b5a739 100644
--- a/src/mesa/drivers/dri/intel/intel_regions.h
+++ b/src/mesa/drivers/dri/intel/intel_regions.h
@@ -73,9 +73,6 @@ void intel_region_release(struct intel_region **ib);
 
 void intel_recreate_static_regions(struct intel_context *intel);
 
-void intel_region_idle(struct intel_context *intel,
-		       struct intel_region *ib);
-
 /* Map/unmap regions.  This is refcounted also: 
  */
 GLubyte *intel_region_map(struct intel_context *intel,
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c
index e1f62bd70e..cd72a4b122 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -41,10 +41,11 @@
 #include "intel_buffers.h"
 #include "intel_tex.h"
 #include "intel_span.h"
-#include "intel_tris.h"
 #include "intel_ioctl.h"
 #include "intel_fbo.h"
+#include "intel_chipset.h"
 
+#include "i915_drm.h"
 #include "i830_dri.h"
 #include "intel_regions.h"
 #include "intel_batchbuffer.h"
@@ -571,9 +572,9 @@ extern GLboolean i830CreateContext(const __GLcontextModes * mesaVis,
 extern GLboolean i915CreateContext(const __GLcontextModes * mesaVis,
                                    __DRIcontextPrivate * driContextPriv,
                                    void *sharedContextPrivate);
-
-
-
+extern GLboolean brwCreateContext(const __GLcontextModes * mesaVis,
+				  __DRIcontextPrivate * driContextPriv,
+				  void *sharedContextPrivate);
 
 static GLboolean
 intelCreateContext(const __GLcontextModes * mesaVis,
@@ -583,29 +584,21 @@ intelCreateContext(const __GLcontextModes * mesaVis,
    __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
    intelScreenPrivate *intelScreen = (intelScreenPrivate *) sPriv->private;
 
-   switch (intelScreen->deviceID) {
-      /* Don't deal with i830 until texture work complete:
-       */
-   case PCI_CHIP_845_G:
-   case PCI_CHIP_I830_M:
-   case PCI_CHIP_I855_GM:
-   case PCI_CHIP_I865_G:
+#ifdef I915
+   if (IS_9XX(intelScreen->deviceID)) {
+      if (!IS_965(intelScreen->deviceID)) {
+	 return i915CreateContext(mesaVis, driContextPriv,
+				  sharedContextPrivate);
+      }
+   } else {
       return i830CreateContext(mesaVis, driContextPriv, sharedContextPrivate);
-
-   case PCI_CHIP_I915_G:
-   case PCI_CHIP_I915_GM:
-   case PCI_CHIP_I945_G:
-   case PCI_CHIP_I945_GM:
-   case PCI_CHIP_I945_GME:
-   case PCI_CHIP_G33_G:
-   case PCI_CHIP_Q35_G:
-   case PCI_CHIP_Q33_G:
-      return i915CreateContext(mesaVis, driContextPriv, sharedContextPrivate);
-
-   default:
-      fprintf(stderr, "Unrecognized deviceID %x\n", intelScreen->deviceID);
-      return GL_FALSE;
    }
+#else
+   if (IS_965(intelScreen->deviceID))
+      return brwCreateContext(mesaVis, driContextPriv, sharedContextPrivate);
+#endif
+   fprintf(stderr, "Unrecognized deviceID %x\n", intelScreen->deviceID);
+   return GL_FALSE;
 }
 
 
@@ -625,7 +618,9 @@ static const struct __DriverAPIRec intelAPI = {
    .WaitForSBC = NULL,
    .SwapBuffersMSC = NULL,
    .CopySubBuffer = intelCopySubBuffer,
+#ifdef I915
    .setTexOffset = intelSetTexOffset,
+#endif
 };
 
 
@@ -723,7 +718,11 @@ intelFillInModes(unsigned pixel_bits, unsigned depth_bits,
  */
 PUBLIC __GLcontextModes *__driDriverInitScreen(__DRIscreenPrivate *psp)
 {
+#ifdef I915
    static const __DRIversion ddx_expected = { 1, 5, 0 };
+#else
+   static const __DRIversion ddx_expected = { 1, 6, 0 };
+#endif
    static const __DRIversion dri_expected = { 4, 0, 0 };
    static const __DRIversion drm_expected = { 1, 5, 0 };
    I830DRIPtr dri_priv = (I830DRIPtr) psp->pDevPriv;
diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c
index b08dee43bc..f016b6b4dc 100644
--- a/src/mesa/drivers/dri/intel/intel_tex.c
+++ b/src/mesa/drivers/dri/intel/intel_tex.c
@@ -1,4 +1,6 @@
+#include "swrast/swrast.h"
 #include "texobj.h"
+#include "mipmap.h"
 #include "intel_context.h"
 #include "intel_mipmap_tree.h"
 #include "intel_tex.h"
@@ -155,6 +157,46 @@ timed_memcpy(void *dest, const void *src, size_t n)
 }
 #endif /* DO_DEBUG */
 
+/**
+ * Generate new mipmap data from BASE+1 to BASE+p (the minimally-sized mipmap
+ * level).
+ *
+ * The texture object's miptree must be mapped.
+ *
+ * It would be really nice if this was just called by Mesa whenever mipmaps
+ * needed to be regenerated, rather than us having to remember to do so in
+ * each texture image modification path.
+ *
+ * This function should also include an accelerated path.
+ */
+void
+intel_generate_mipmap(GLcontext *ctx, GLenum target,
+                      const struct gl_texture_unit *texUnit,
+                      struct gl_texture_object *texObj)
+{
+   struct intel_texture_object *intelObj = intel_texture_object(texObj);
+   GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+   int face, i;
+
+   _mesa_generate_mipmap(ctx, target, texUnit, texObj);
+
+   /* Update the level information in our private data in the new images, since
+    * it didn't get set as part of a normal TexImage path.
+    */
+   for (face = 0; face < nr_faces; face++) {
+      for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) {
+         struct intel_texture_image *intelImage;
+
+	 intelImage = intel_texture_image(texObj->Image[face][i]);
+	 if (intelImage == NULL)
+	    break;
+
+	 intelImage->level = i;
+	 intelImage->face = face;
+      }
+   }
+}
+
 
 void
 intelInitTextureFuncs(struct dd_function_table *functions)
@@ -166,10 +208,17 @@ intelInitTextureFuncs(struct dd_function_table *functions)
    functions->TexSubImage1D = intelTexSubImage1D;
    functions->TexSubImage2D = intelTexSubImage2D;
    functions->TexSubImage3D = intelTexSubImage3D;
+#ifdef I915
    functions->CopyTexImage1D = intelCopyTexImage1D;
    functions->CopyTexImage2D = intelCopyTexImage2D;
    functions->CopyTexSubImage1D = intelCopyTexSubImage1D;
    functions->CopyTexSubImage2D = intelCopyTexSubImage2D;
+#else
+   functions->CopyTexImage1D = _swrast_copy_teximage1d;
+   functions->CopyTexImage2D = _swrast_copy_teximage2d;
+   functions->CopyTexSubImage1D = _swrast_copy_texsubimage1d;
+   functions->CopyTexSubImage2D = _swrast_copy_texsubimage2d;
+#endif
    functions->GetTexImage = intelGetTexImage;
 
    /* compressed texture functions */
diff --git a/src/mesa/drivers/dri/intel/intel_tex.h b/src/mesa/drivers/dri/intel/intel_tex.h
index b77d7a1d8a..2973e0ceb9 100644
--- a/src/mesa/drivers/dri/intel/intel_tex.h
+++ b/src/mesa/drivers/dri/intel/intel_tex.h
@@ -148,4 +148,8 @@ void intel_tex_unmap_images(struct intel_context *intel,
 
 int intel_compressed_num_bytes(GLuint mesaFormat);
 
+void intel_generate_mipmap(GLcontext *ctx, GLenum target,
+			   const struct gl_texture_unit *texUnit,
+			   struct gl_texture_object *texObj);
+
 #endif
diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c
index f1a455a04c..521ce06640 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c
@@ -29,6 +29,7 @@
 #include "enums.h"
 #include "image.h"
 #include "teximage.h"
+#include "mipmap.h"
 #include "swrast/swrast.h"
 
 #include "intel_screen.h"
@@ -85,12 +86,14 @@ get_teximage_source(struct intel_context *intel, GLenum internalFormat)
 
 static GLboolean
 do_copy_texsubimage(struct intel_context *intel,
+		    GLenum target,
                     struct intel_texture_image *intelImage,
                     GLenum internalFormat,
                     GLint dstx, GLint dsty,
                     GLint x, GLint y, GLsizei width, GLsizei height)
 {
    GLcontext *ctx = &intel->ctx;
+   struct gl_texture_object *texObj = intelImage->base.TexObject;
    const struct intel_region *src =
       get_teximage_source(intel, internalFormat);
 
@@ -156,16 +159,12 @@ do_copy_texsubimage(struct intel_context *intel,
 
    UNLOCK_HARDWARE(intel);
 
-#if 0
-   /* GL_SGIS_generate_mipmap -- this can be accelerated now.
-    * XXX Add a ctx->Driver.GenerateMipmaps() function?
-    */
-   if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
+   /* GL_SGIS_generate_mipmap */
+   if (intelImage->level == texObj->BaseLevel && texObj->GenerateMipmap) {
       intel_generate_mipmap(ctx, target,
                             &ctx->Texture.Unit[ctx->Texture.CurrentUnit],
                             texObj);
    }
-#endif
 
    return GL_TRUE;
 }
@@ -197,7 +196,7 @@ intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level,
                           GL_RGBA, CHAN_TYPE, NULL,
                           &ctx->DefaultPacking, texObj, texImage);
 
-   if (!do_copy_texsubimage(intel_context(ctx),
+   if (!do_copy_texsubimage(intel_context(ctx), target,
                             intel_texture_image(texImage),
                             internalFormat, 0, 0, x, y, width, 1))
       goto fail;
@@ -234,7 +233,7 @@ intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level,
                           &ctx->DefaultPacking, texObj, texImage);
 
 
-   if (!do_copy_texsubimage(intel_context(ctx),
+   if (!do_copy_texsubimage(intel_context(ctx), target,
                             intel_texture_image(texImage),
                             internalFormat, 0, 0, x, y, width, height))
       goto fail;
@@ -264,7 +263,7 @@ intelCopyTexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
    /* Need to check texture is compatible with source format. 
     */
 
-   if (!do_copy_texsubimage(intel_context(ctx),
+   if (!do_copy_texsubimage(intel_context(ctx), target,
                             intel_texture_image(texImage),
                             internalFormat, xoffset, 0, x, y, width, 1)) {
       _swrast_copy_texsubimage1d(ctx, target, level, xoffset, x, y, width);
@@ -290,7 +289,7 @@ intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
    /* Need to check texture is compatible with source format. 
     */
 
-   if (!do_copy_texsubimage(intel_context(ctx),
+   if (!do_copy_texsubimage(intel_context(ctx), target,
                             intel_texture_image(texImage),
                             internalFormat,
                             xoffset, yoffset, x, y, width, height)) {
diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c
index fd800a7bb6..4f5f75d049 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_image.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_image.c
@@ -457,10 +457,6 @@ intelTexImage(GLcontext * ctx,
    if (!pixels)
       return;
 
-
-   if (intelImage->mt)
-      intel_region_idle(intel, intelImage->mt->region);
-
    LOCK_HARDWARE(intel);
 
    if (intelImage->mt) {
@@ -470,6 +466,7 @@ intelTexImage(GLcontext * ctx,
                                                intelImage->level,
                                                &dstRowStride,
                                                intelImage->base.ImageOffsets);
+      texImage->RowStride = dstRowStride / intelImage->mt->cpp;
    }
    else {
       /* Allocate regular memory and store the image there temporarily.   */
@@ -487,8 +484,8 @@ intelTexImage(GLcontext * ctx,
       texImage->Data = malloc(sizeInBytes);
    }
 
-   DBG("Upload image %dx%dx%d row_len %x "
-       "pitch %x\n",
+   DBG("Upload image %dx%dx%d row_len %d "
+       "pitch %d\n",
        width, height, depth, width * texelBytes, dstRowStride);
 
    /* Copy data.  Would like to know when it's ok for us to eg. use
@@ -508,6 +505,13 @@ intelTexImage(GLcontext * ctx,
       _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
    }
 
+   /* GL_SGIS_generate_mipmap */
+   if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
+      intel_generate_mipmap(ctx, target,
+                            &ctx->Texture.Unit[ctx->Texture.CurrentUnit],
+                            texObj);
+   }
+
    _mesa_unmap_teximage_pbo(ctx, unpack);
 
    if (intelImage->mt) {
@@ -516,16 +520,6 @@ intelTexImage(GLcontext * ctx,
    }
 
    UNLOCK_HARDWARE(intel);
-
-#if 0
-   /* GL_SGIS_generate_mipmap -- this can be accelerated now.
-    */
-   if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
-      intel_generate_mipmap(ctx, target,
-                            &ctx->Texture.Unit[ctx->Texture.CurrentUnit],
-                            texObj);
-   }
-#endif
 }
 
 void
diff --git a/src/mesa/drivers/dri/intel/intel_tex_layout.c b/src/mesa/drivers/dri/intel/intel_tex_layout.c
index 4da636021b..edc3a2eaa4 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_layout.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_layout.c
@@ -52,7 +52,7 @@ GLuint intel_compressed_alignment(GLenum internalFormat)
     return alignment;
 }
 
-void i945_miptree_layout_2d( struct intel_mipmap_tree *mt )
+void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tree *mt )
 {
    GLint align_h = 2, align_w = 4;
    GLuint level;
@@ -92,7 +92,7 @@ void i945_miptree_layout_2d( struct intel_mipmap_tree *mt )
    /* Pitch must be a whole number of dwords, even though we
     * express it in texels.
     */
-   mt->pitch = ALIGN(mt->pitch * mt->cpp, 4) / mt->cpp;
+   mt->pitch = intel_miptree_pitch_align (intel, mt, mt->pitch);
    mt->total_height = 0;
 
    for ( level = mt->first_level ; level <= mt->last_level ; level++ ) {
diff --git a/src/mesa/drivers/dri/intel/intel_tex_layout.h b/src/mesa/drivers/dri/intel/intel_tex_layout.h
index 99d41c3629..193699d3f7 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_layout.h
+++ b/src/mesa/drivers/dri/intel/intel_tex_layout.h
@@ -38,5 +38,5 @@ static GLuint minify( GLuint d )
    return MAX2(1, d>>1);
 }
 
-extern void i945_miptree_layout_2d( struct intel_mipmap_tree *mt );
+extern void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tree *mt );
 extern GLuint intel_compressed_alignment(GLenum);
diff --git a/src/mesa/drivers/dri/intel/intel_tex_subimage.c b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
index 32de2cfb48..bd27b86bf3 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
@@ -64,9 +64,6 @@ intelTexSubimage(GLcontext * ctx,
    if (!pixels)
       return;
 
-   if (intelImage->mt)
-      intel_region_idle(intel, intelImage->mt->region);
-
    LOCK_HARDWARE(intel);
 
    /* Map buffer if necessary.  Need to lock to prevent other contexts
@@ -93,14 +90,12 @@ intelTexSubimage(GLcontext * ctx,
       _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage");
    }
 
-#if 0
    /* GL_SGIS_generate_mipmap */
    if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
-      _mesa_generate_mipmap(ctx, target,
+      intel_generate_mipmap(ctx, target,
                             &ctx->Texture.Unit[ctx->Texture.CurrentUnit],
                             texObj);
    }
-#endif
 
    _mesa_unmap_teximage_pbo(ctx, packing);
 
diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c
index af18c26d55..d260a721d9 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c
@@ -40,6 +40,7 @@ intel_calculate_first_last_level(struct intel_texture_object *intelObj)
          firstLevel = lastLevel = tObj->BaseLevel;
       }
       else {
+#ifdef I915
          firstLevel = tObj->BaseLevel + (GLint) (tObj->MinLod + 0.5);
          firstLevel = MAX2(firstLevel, tObj->BaseLevel);
          lastLevel = tObj->BaseLevel + (GLint) (tObj->MaxLod + 0.5);
@@ -47,6 +48,18 @@ intel_calculate_first_last_level(struct intel_texture_object *intelObj)
          lastLevel = MIN2(lastLevel, tObj->BaseLevel + baseImage->MaxLog2);
          lastLevel = MIN2(lastLevel, tObj->MaxLevel);
          lastLevel = MAX2(firstLevel, lastLevel);       /* need at least one level */
+#else
+	 /* Currently not taking min/max lod into account here, those
+	  * values are programmed as sampler state elsewhere and we
+	  * upload the same mipmap levels regardless.  Not sure if
+	  * this makes sense as it means it isn't possible for the app
+	  * to use min/max lod to reduce texture memory pressure:
+	  */
+	 firstLevel = tObj->BaseLevel;
+	 lastLevel = MIN2(tObj->BaseLevel + baseImage->MaxLog2,
+			  tObj->MaxLevel);
+	 lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */
+#endif
       }
       break;
    case GL_TEXTURE_RECTANGLE_NV:
@@ -62,6 +75,10 @@ intel_calculate_first_last_level(struct intel_texture_object *intelObj)
    intelObj->lastLevel = lastLevel;
 }
 
+/**
+ * Copies the image's contents at its level into the object's miptree,
+ * and updates the image to point at the object's miptree.
+ */
 static void
 copy_image_data_to_tree(struct intel_context *intel,
                         struct intel_texture_object *intelObj,
@@ -211,8 +228,15 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
       }
    }
 
+#ifdef I915
+   /* XXX: what is this flush about?
+    * On 965, it causes a batch flush in the middle of the state relocation
+    * emits, which means that the eventual rendering doesn't have all of the
+    * required relocations in place.
+    */
    if (need_flush)
       intel_batchbuffer_flush(intel->batch);
+#endif
 
    return GL_TRUE;
 }