126 files changed, 3689 insertions, 7528 deletions
diff --git a/src/mesa/drivers/dri/common/dri_bufmgr.c b/src/mesa/drivers/dri/common/dri_bufmgr.c
index 757a237494..18b12c63f7 100644
--- a/src/mesa/drivers/dri/common/dri_bufmgr.c
+++ b/src/mesa/drivers/dri/common/dri_bufmgr.c
@@ -154,3 +154,9 @@ void dri_post_submit(dri_bo *batch_buf, dri_fence **last_fence)
 {
    batch_buf->bufmgr->post_submit(batch_buf, last_fence);
 }
+
+void
+dri_bufmgr_set_debug(dri_bufmgr *bufmgr, GLboolean enable_debug)
+{
+   bufmgr->debug = enable_debug;
+}
diff --git a/src/mesa/drivers/dri/common/dri_bufmgr.h b/src/mesa/drivers/dri/common/dri_bufmgr.h
index d263ad279b..cdf27b903f 100644
--- a/src/mesa/drivers/dri/common/dri_bufmgr.h
+++ b/src/mesa/drivers/dri/common/dri_bufmgr.h
@@ -172,6 +172,8 @@ struct _dri_bufmgr {
    void *(*process_relocs)(dri_bo *batch_buf, GLuint *count);
 
    void (*post_submit)(dri_bo *batch_buf, dri_fence **fence);
+
+   GLboolean debug; /**< Enables verbose debugging printouts */
 };
 
 dri_bo *dri_bo_alloc(dri_bufmgr *bufmgr, const char *name, unsigned long size,
@@ -202,7 +204,7 @@ dri_bufmgr *dri_bufmgr_fake_init(unsigned long low_offset, void *low_virtual,
 				 int (*fence_wait)(void *private,
 						   unsigned int cookie),
 				 void *driver_priv);
-void dri_bufmgr_fake_set_debug(dri_bufmgr *bufmgr, GLboolean enable_debug);
+void dri_bufmgr_set_debug(dri_bufmgr *bufmgr, GLboolean enable_debug);
 void dri_bo_fake_disable_backing_store(dri_bo *bo,
 				       void (*invalidate_cb)(dri_bo *bo,
 							     void *ptr),
diff --git a/src/mesa/drivers/dri/common/dri_bufmgr_fake.c b/src/mesa/drivers/dri/common/dri_bufmgr_fake.c
index 5cd96f0821..65b2c174d5 100644
--- a/src/mesa/drivers/dri/common/dri_bufmgr_fake.c
+++ b/src/mesa/drivers/dri/common/dri_bufmgr_fake.c
@@ -42,7 +42,7 @@
 #include "imports.h"
 
 #define DBG(...) do {					\
-   if (bufmgr_fake->debug)				\
+   if (bufmgr_fake->bufmgr.debug)			\
       _mesa_printf(__VA_ARGS__);			\
 } while (0)
 
@@ -95,8 +95,6 @@ struct block {
 typedef struct _bufmgr_fake {
    dri_bufmgr bufmgr;
 
-   _glthread_Mutex mutex;	/**< for thread safety */
-
    unsigned long low_offset;
    unsigned long size;
    void *virtual;
@@ -545,32 +543,27 @@ void
 dri_bufmgr_fake_contended_lock_take(dri_bufmgr *bufmgr)
 {
    dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bufmgr;
+   struct block *block, *tmp;
 
-   _glthread_LOCK_MUTEX(bufmgr_fake->mutex);
-   {
-      struct block *block, *tmp;
+   bufmgr_fake->need_fence = 1;
+   bufmgr_fake->fail = 0;
 
-      bufmgr_fake->need_fence = 1;
-      bufmgr_fake->fail = 0;
-
-      /* Wait for hardware idle.  We don't know where acceleration has been
-       * happening, so we'll need to wait anyway before letting anything get
-       * put on the card again.
-       */
-      dri_bufmgr_fake_wait_idle(bufmgr_fake);
+   /* Wait for hardware idle.  We don't know where acceleration has been
+    * happening, so we'll need to wait anyway before letting anything get
+    * put on the card again.
+    */
+   dri_bufmgr_fake_wait_idle(bufmgr_fake);
 
-      /* Check that we hadn't released the lock without having fenced the last
-       * set of buffers.
-       */
-      assert(is_empty_list(&bufmgr_fake->fenced));
-      assert(is_empty_list(&bufmgr_fake->on_hardware));
+   /* Check that we hadn't released the lock without having fenced the last
+    * set of buffers.
+    */
+   assert(is_empty_list(&bufmgr_fake->fenced));
+   assert(is_empty_list(&bufmgr_fake->on_hardware));
 
-      foreach_s(block, tmp, &bufmgr_fake->lru) {
-	 assert(_fence_test(bufmgr_fake, block->fence));
-	 set_dirty(block->bo);
-      }
+   foreach_s(block, tmp, &bufmgr_fake->lru) {
+      assert(_fence_test(bufmgr_fake, block->fence));
+      set_dirty(block->bo);
    }
-   _glthread_UNLOCK_MUTEX(bufmgr_fake->mutex);
 }
 
 static dri_bo *
@@ -646,12 +639,9 @@ dri_fake_bo_alloc_static(dri_bufmgr *bufmgr, const char *name,
 static void
 dri_fake_bo_reference(dri_bo *bo)
 {
-   dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr;
    dri_bo_fake *bo_fake = (dri_bo_fake *)bo;
 
-   _glthread_LOCK_MUTEX(bufmgr_fake->mutex);
    bo_fake->refcount++;
-   _glthread_UNLOCK_MUTEX(bufmgr_fake->mutex);
 }
 
 static void
@@ -663,18 +653,16 @@ dri_fake_bo_unreference(dri_bo *bo)
    if (!bo)
       return;
 
-   _glthread_LOCK_MUTEX(bufmgr_fake->mutex);
    if (--bo_fake->refcount == 0) {
+      assert(bo_fake->map_count == 0);
       /* No remaining references, so free it */
       if (bo_fake->block)
 	 free_block(bufmgr_fake, bo_fake->block);
       free_backing_store(bo);
-      _glthread_UNLOCK_MUTEX(bufmgr_fake->mutex);
       free(bo);
       DBG("drm_bo_unreference: free %s\n", bo_fake->name);
       return;
    }
-   _glthread_UNLOCK_MUTEX(bufmgr_fake->mutex);
 }
 
 /**
@@ -689,8 +677,6 @@ void dri_bo_fake_disable_backing_store(dri_bo *bo,
    dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr;
    dri_bo_fake *bo_fake = (dri_bo_fake *)bo;
 
-   _glthread_LOCK_MUTEX(bufmgr_fake->mutex);
-
    if (bo_fake->backing_store)
       free_backing_store(bo);
 
@@ -707,8 +693,6 @@ void dri_bo_fake_disable_backing_store(dri_bo *bo,
     */
    if (invalidate_cb != NULL)
       invalidate_cb(bo, ptr);
-
-   _glthread_UNLOCK_MUTEX(bufmgr_fake->mutex);
 }
 
 /**
@@ -725,12 +709,9 @@ dri_fake_bo_map(dri_bo *bo, GLboolean write_enable)
    if (bo_fake->is_static)
       return 0;
 
-   _glthread_LOCK_MUTEX(bufmgr_fake->mutex);
    /* Allow recursive mapping, which is used internally in relocation. */
-   if (bo_fake->map_count++ != 0) {
-      _glthread_UNLOCK_MUTEX(bufmgr_fake->mutex);
+   if (bo_fake->map_count++ != 0)
       return 0;
-   }
 
    /* Clear the relocation cache if unknown data is going to be written in. */
    if (!bufmgr_fake->in_relocation && write_enable) {
@@ -750,7 +731,6 @@ dri_fake_bo_map(dri_bo *bo, GLboolean write_enable)
 	 if (!bo_fake->block && !evict_and_alloc_block(bo)) {
 	    DBG("%s: alloc failed\n", __FUNCTION__);
 	    bufmgr_fake->fail = 1;
-	    _glthread_UNLOCK_MUTEX(bufmgr_fake->mutex);
 	    return 1;
 	 }
 	 else {
@@ -773,7 +753,7 @@ dri_fake_bo_map(dri_bo *bo, GLboolean write_enable)
 	 bo->virtual = bo_fake->backing_store;
       }
    }
-   _glthread_UNLOCK_MUTEX(bufmgr_fake->mutex);
+
    return 0;
 }
 
@@ -787,20 +767,15 @@ dri_fake_bo_unmap(dri_bo *bo)
    if (bo_fake->is_static)
       return 0;
 
-   _glthread_LOCK_MUTEX(bufmgr_fake->mutex);
    assert(bo_fake->map_count != 0);
-   if (--bo_fake->map_count != 0) {
-      _glthread_UNLOCK_MUTEX(bufmgr_fake->mutex);
+   if (--bo_fake->map_count != 0)
       return 0;
-   }
 
    DBG("drm_bo_unmap: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name,
        bo_fake->bo.size / 1024);
 
    bo->virtual = NULL;
 
-   _glthread_UNLOCK_MUTEX(bufmgr_fake->mutex);
-
    return 0;
 }
 
@@ -818,58 +793,52 @@ dri_fake_bo_validate(dri_bo *bo, uint64_t flags)
    DBG("drm_bo_validate: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name,
        bo_fake->bo.size / 1024);
 
-   _glthread_LOCK_MUTEX(bufmgr_fake->mutex);
-   {
-      /* Sanity check: Buffers should be unmapped before being validated.
-       * This is not so much of a problem for bufmgr_fake, but TTM refuses,
-       * and the problem is harder to debug there.
-       */
-      assert(bo_fake->map_count == 0);
+   /* Sanity check: Buffers should be unmapped before being validated.
+    * This is not so much of a problem for bufmgr_fake, but TTM refuses,
+    * and the problem is harder to debug there.
+    */
+   assert(bo_fake->map_count == 0);
 
-      if (bo_fake->is_static) {
-	 /* Add it to the needs-fence list */
-	 bufmgr_fake->need_fence = 1;
-	 _glthread_UNLOCK_MUTEX(bufmgr_fake->mutex);
-	 return 0;
-      }
+   if (bo_fake->is_static) {
+      /* Add it to the needs-fence list */
+      bufmgr_fake->need_fence = 1;
+      return 0;
+   }
 
-      /* Allocate the card memory */
-      if (!bo_fake->block && !evict_and_alloc_block(bo)) {
-	 bufmgr_fake->fail = 1;
-	 _glthread_UNLOCK_MUTEX(bufmgr_fake->mutex);
-	 DBG("Failed to validate buf %d:%s\n", bo_fake->id, bo_fake->name);
-	 return -1;
-      }
+   /* Allocate the card memory */
+   if (!bo_fake->block && !evict_and_alloc_block(bo)) {
+      bufmgr_fake->fail = 1;
+      DBG("Failed to validate buf %d:%s\n", bo_fake->id, bo_fake->name);
+      return -1;
+   }
 
-      assert(bo_fake->block);
-      assert(bo_fake->block->bo == &bo_fake->bo);
+   assert(bo_fake->block);
+   assert(bo_fake->block->bo == &bo_fake->bo);
 
-      bo->offset = bo_fake->block->mem->ofs;
+   bo->offset = bo_fake->block->mem->ofs;
 
-      /* Upload the buffer contents if necessary */
-      if (bo_fake->dirty) {
-	 DBG("Upload dirty buf %d:%s, sz %d offset 0x%x\n", bo_fake->id,
-	     bo_fake->name, bo->size, bo_fake->block->mem->ofs);
+   /* Upload the buffer contents if necessary */
+   if (bo_fake->dirty) {
+      DBG("Upload dirty buf %d:%s, sz %d offset 0x%x\n", bo_fake->id,
+	  bo_fake->name, bo->size, bo_fake->block->mem->ofs);
 
-	 assert(!(bo_fake->flags &
-		  (BM_NO_BACKING_STORE|BM_PINNED)));
+      assert(!(bo_fake->flags &
+	       (BM_NO_BACKING_STORE|BM_PINNED)));
 
-	 /* Actually, should be able to just wait for a fence on the memory,
-	  * which we would be tracking when we free it.  Waiting for idle is
-	  * a sufficiently large hammer for now.
-	  */
-	 dri_bufmgr_fake_wait_idle(bufmgr_fake);
+      /* Actually, should be able to just wait for a fence on the memory,
+       * which we would be tracking when we free it.  Waiting for idle is
+       * a sufficiently large hammer for now.
+       */
+      dri_bufmgr_fake_wait_idle(bufmgr_fake);
 
-	 memcpy(bo_fake->block->virtual, bo_fake->backing_store, bo->size);
-	 bo_fake->dirty = 0;
-      }
+      memcpy(bo_fake->block->virtual, bo_fake->backing_store, bo->size);
+      bo_fake->dirty = 0;
+   }
 
-      bo_fake->block->on_hardware = 1;
-      move_to_tail(&bufmgr_fake->on_hardware, bo_fake->block);
+   bo_fake->block->on_hardware = 1;
+   move_to_tail(&bufmgr_fake->on_hardware, bo_fake->block);
 
-      bufmgr_fake->need_fence = 1;
-   }
-   _glthread_UNLOCK_MUTEX(bufmgr_fake->mutex);
+   bufmgr_fake->need_fence = 1;
 
    return 0;
 }
@@ -891,11 +860,9 @@ dri_fake_fence_validated(dri_bufmgr *bufmgr, const char *name,
    fence_fake->flushed = flushed;
    fence_fake->fence.bufmgr = bufmgr;
 
-   _glthread_LOCK_MUTEX(bufmgr_fake->mutex);
    cookie = _fence_emit_internal(bufmgr_fake);
    fence_fake->fence_cookie = cookie;
    fence_blocks(bufmgr_fake, cookie);
-   _glthread_UNLOCK_MUTEX(bufmgr_fake->mutex);
 
    DBG("drm_fence_validated: 0x%08x cookie\n", fence_fake->fence_cookie);
 
@@ -906,29 +873,22 @@ static void
 dri_fake_fence_reference(dri_fence *fence)
 {
    dri_fence_fake *fence_fake = (dri_fence_fake *)fence;
-   dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)fence->bufmgr;
 
-   _glthread_LOCK_MUTEX(bufmgr_fake->mutex);
    ++fence_fake->refcount;
-   _glthread_UNLOCK_MUTEX(bufmgr_fake->mutex);
 }
 
 static void
 dri_fake_fence_unreference(dri_fence *fence)
 {
    dri_fence_fake *fence_fake = (dri_fence_fake *)fence;
-   dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)fence->bufmgr;
 
    if (!fence)
       return;
 
-   _glthread_LOCK_MUTEX(bufmgr_fake->mutex);
    if (--fence_fake->refcount == 0) {
-      _glthread_UNLOCK_MUTEX(bufmgr_fake->mutex);
       free(fence);
       return;
    }
-   _glthread_UNLOCK_MUTEX(bufmgr_fake->mutex);
 }
 
 static void
@@ -939,9 +899,7 @@ dri_fake_fence_wait(dri_fence *fence)
 
    DBG("drm_fence_wait: 0x%08x cookie\n", fence_fake->fence_cookie);
 
-   _glthread_LOCK_MUTEX(bufmgr_fake->mutex);
    _fence_wait_internal(bufmgr_fake, fence_fake->fence_cookie);
-   _glthread_UNLOCK_MUTEX(bufmgr_fake->mutex);
 }
 
 static void
@@ -949,7 +907,6 @@ dri_fake_destroy(dri_bufmgr *bufmgr)
 {
    dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bufmgr;
 
-   _glthread_DESTROY_MUTEX(bufmgr_fake->mutex);
    mmDestroy(bufmgr_fake->heap);
    free(bufmgr);
 }
@@ -1150,14 +1107,6 @@ dri_fake_post_submit(dri_bo *batch_buf, dri_fence **last_fence)
    bufmgr_fake->nr_relocs = 0;
 }
 
-void
-dri_bufmgr_fake_set_debug(dri_bufmgr *bufmgr, GLboolean enable_debug)
-{
-   dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bufmgr;
-
-   bufmgr_fake->debug = enable_debug;
-}
-
 dri_bufmgr *
 dri_bufmgr_fake_init(unsigned long low_offset, void *low_virtual,
 		     unsigned long size,
@@ -1179,8 +1128,6 @@ dri_bufmgr_fake_init(unsigned long low_offset, void *low_virtual,
    bufmgr_fake->size = size;
    bufmgr_fake->heap = mmInit(low_offset, size);
 
-   _glthread_INIT_MUTEX(bufmgr_fake->mutex);
-
    /* Hook in methods */
    bufmgr_fake->bufmgr.bo_alloc = dri_fake_bo_alloc;
    bufmgr_fake->bufmgr.bo_alloc_static = dri_fake_bo_alloc_static;
@@ -1195,6 +1142,8 @@ dri_bufmgr_fake_init(unsigned long low_offset, void *low_virtual,
    bufmgr_fake->bufmgr.emit_reloc = dri_fake_emit_reloc;
    bufmgr_fake->bufmgr.process_relocs = dri_fake_process_relocs;
    bufmgr_fake->bufmgr.post_submit = dri_fake_post_submit;
+   bufmgr_fake->bufmgr.debug = GL_FALSE;
+
    bufmgr_fake->fence_emit = fence_emit;
    bufmgr_fake->fence_wait = fence_wait;
    bufmgr_fake->driver_priv = driver_priv;
diff --git a/src/mesa/drivers/dri/common/xmlconfig.c b/src/mesa/drivers/dri/common/xmlconfig.c
index b635894fe5..c313d71e80 100644
--- a/src/mesa/drivers/dri/common/xmlconfig.c
+++ b/src/mesa/drivers/dri/common/xmlconfig.c
@@ -403,40 +403,40 @@ static GLboolean checkValue (const driOptionValue *v, const driOptionInfo *info)
 /** \brief Output a warning message. */
 #define XML_WARNING1(msg) do {\
     __driUtilMessage ("Warning in %s line %d, column %d: "msg, data->name, \
-                      XML_GetCurrentLineNumber(data->parser), \
-                      XML_GetCurrentColumnNumber(data->parser)); \
+                      (int) XML_GetCurrentLineNumber(data->parser), \
+                      (int) XML_GetCurrentColumnNumber(data->parser)); \
 } while (0)
 #define XML_WARNING(msg,args...) do { \
     __driUtilMessage ("Warning in %s line %d, column %d: "msg, data->name, \
-                      XML_GetCurrentLineNumber(data->parser), \
-                      XML_GetCurrentColumnNumber(data->parser), \
+                      (int) XML_GetCurrentLineNumber(data->parser), \
+                      (int) XML_GetCurrentColumnNumber(data->parser), \
                       args); \
 } while (0)
 /** \brief Output an error message. */
 #define XML_ERROR1(msg) do { \
     __driUtilMessage ("Error in %s line %d, column %d: "msg, data->name, \
-                      XML_GetCurrentLineNumber(data->parser), \
-                      XML_GetCurrentColumnNumber(data->parser)); \
+                      (int) XML_GetCurrentLineNumber(data->parser), \
+                      (int) XML_GetCurrentColumnNumber(data->parser)); \
 } while (0)
 #define XML_ERROR(msg,args...) do { \
     __driUtilMessage ("Error in %s line %d, column %d: "msg, data->name, \
-                      XML_GetCurrentLineNumber(data->parser), \
-                      XML_GetCurrentColumnNumber(data->parser), \
+                      (int) XML_GetCurrentLineNumber(data->parser), \
+                      (int) XML_GetCurrentColumnNumber(data->parser), \
                       args); \
 } while (0)
 /** \brief Output a fatal error message and abort. */
 #define XML_FATAL1(msg) do { \
     fprintf (stderr, "Fatal error in %s line %d, column %d: "msg"\n", \
              data->name, \
-             XML_GetCurrentLineNumber(data->parser), \
-             XML_GetCurrentColumnNumber(data->parser)); \
+             (int) XML_GetCurrentLineNumber(data->parser),	\
+             (int) XML_GetCurrentColumnNumber(data->parser)); \
     abort();\
 } while (0)
 #define XML_FATAL(msg,args...) do { \
     fprintf (stderr, "Fatal error in %s line %d, column %d: "msg"\n", \
              data->name, \
-             XML_GetCurrentLineNumber(data->parser), \
-             XML_GetCurrentColumnNumber(data->parser), \
+             (int) XML_GetCurrentLineNumber(data->parser),	\
+             (int) XML_GetCurrentColumnNumber(data->parser),		\
              args); \
     abort();\
 } while (0)
diff --git a/src/mesa/drivers/dri/gamma/gamma_render.c b/src/mesa/drivers/dri/gamma/gamma_render.c
index 4b462f2252..a8fba499a5 100644
--- a/src/mesa/drivers/dri/gamma/gamma_render.c
+++ b/src/mesa/drivers/dri/gamma/gamma_render.c
@@ -193,7 +193,7 @@ static GLboolean gamma_run_render( GLcontext *ctx,
 
    for (i = 0 ; i < VB->PrimitiveCount ; i++)
    {
-      GLuint prim = VB->Primitive[i].mode;
+      GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
       GLuint start = VB->Primitive[i].start;
       GLuint length = VB->Primitive[i].count;
 
diff --git a/src/mesa/drivers/dri/i810/i810render.c b/src/mesa/drivers/dri/i810/i810render.c
index a31d54236c..d0225969b6 100644
--- a/src/mesa/drivers/dri/i810/i810render.c
+++ b/src/mesa/drivers/dri/i810/i810render.c
@@ -144,7 +144,7 @@ static GLboolean i810_run_render( GLcontext *ctx,
 
    for (i = 0 ; i < VB->PrimitiveCount ; i++)
    {
-      GLuint prim = VB->Primitive[i].mode;
+      GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
       GLuint start = VB->Primitive[i].start;
       GLuint length = VB->Primitive[i].count;
 
diff --git a/src/mesa/drivers/dri/i915/i915_state.c b/src/mesa/drivers/dri/i915/i915_state.c
index e5d8d27993..21d2fe6289 100644
--- a/src/mesa/drivers/dri/i915/i915_state.c
+++ b/src/mesa/drivers/dri/i915/i915_state.c
@@ -311,7 +311,7 @@ static void
 i915PolygonStipple(GLcontext * ctx, const GLubyte * mask)
 {
    struct i915_context *i915 = I915_CONTEXT(ctx);
-   const GLubyte *m = mask;
+   const GLubyte *m;
    GLubyte p[4];
    int i, j, k;
    int active = (ctx->Polygon.StippleFlag &&
@@ -323,6 +323,12 @@ i915PolygonStipple(GLcontext * ctx, const GLubyte * mask)
       i915->state.Stipple[I915_STPREG_ST1] &= ~ST1_ENABLE;
    }
 
+   /* Use the already unpacked stipple data from the context rather than the
+    * uninterpreted mask passed in.
+    */
+   mask = (const GLubyte *)ctx->PolygonStipple;
+   m = mask;
+
    p[0] = mask[12] & 0xf;
    p[0] |= p[0] << 4;
    p[1] = mask[8] & 0xf;
diff --git a/src/mesa/drivers/dri/i915/i915_tex_layout.c b/src/mesa/drivers/dri/i915/i915_tex_layout.c
index 7b761a7b22..dfd02112ba 100644
--- a/src/mesa/drivers/dri/i915/i915_tex_layout.c
+++ b/src/mesa/drivers/dri/i915/i915_tex_layout.c
@@ -54,7 +54,7 @@ static GLint step_offsets[6][2] = { {0, 2},
 };
 
 GLboolean
-i915_miptree_layout(struct intel_mipmap_tree * mt)
+i915_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt)
 {
    GLint level;
 
@@ -67,7 +67,7 @@ i915_miptree_layout(struct intel_mipmap_tree * mt)
          assert(lvlWidth == lvlHeight); /* cubemap images are square */
 
          /* double pitch for cube layouts */
-         mt->pitch = ((dim * mt->cpp * 2 + 3) & ~3) / mt->cpp;
+         mt->pitch = intel_miptree_pitch_align (intel, mt, dim * 2);
          mt->total_height = dim * 4;
 
          for (level = mt->first_level; level <= mt->last_level; level++) {
@@ -107,7 +107,7 @@ i915_miptree_layout(struct intel_mipmap_tree * mt)
 
          /* Calculate the size of a single slice. 
           */
-         mt->pitch = ((mt->width0 * mt->cpp + 3) & ~3) / mt->cpp;
+         mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0);
 
          /* XXX: hardware expects/requires 9 levels at minimum.
           */
@@ -150,7 +150,7 @@ i915_miptree_layout(struct intel_mipmap_tree * mt)
          GLuint height = mt->height0;
 	 GLuint img_height;
 
-         mt->pitch = ((mt->width0 * mt->cpp + 3) & ~3) / mt->cpp;
+         mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0);
          mt->total_height = 0;
 
          for (level = mt->first_level; level <= mt->last_level; level++) {
@@ -180,7 +180,7 @@ i915_miptree_layout(struct intel_mipmap_tree * mt)
 
 
 GLboolean
-i945_miptree_layout(struct intel_mipmap_tree * mt)
+i945_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt)
 {
    GLint level;
 
@@ -197,7 +197,7 @@ i945_miptree_layout(struct intel_mipmap_tree * mt)
           * or the final row of 4x4, 2x2 and 1x1 faces below this. 
           */
          if (dim > 32)
-            mt->pitch = ((dim * mt->cpp * 2 + 3) & ~3) / mt->cpp;
+            mt->pitch = intel_miptree_pitch_align (intel, mt, dim);
          else
             mt->pitch = 14 * 8;
 
@@ -279,7 +279,7 @@ i945_miptree_layout(struct intel_mipmap_tree * mt)
          GLuint pack_y_pitch;
          GLuint level;
 
-         mt->pitch = ((mt->width0 * mt->cpp + 3) & ~3) / mt->cpp;
+         mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0);
          mt->total_height = 0;
 
          pack_y_pitch = MAX2(mt->height0, 2);
@@ -329,7 +329,7 @@ i945_miptree_layout(struct intel_mipmap_tree * mt)
    case GL_TEXTURE_1D:
    case GL_TEXTURE_2D:
    case GL_TEXTURE_RECTANGLE_ARB:
-         i945_miptree_layout_2d(mt);
+         i945_miptree_layout_2d(intel, mt);
          break;
    default:
       _mesa_problem(NULL, "Unexpected tex target in i945_miptree_layout()");
diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c
index 09684e87b4..1c45fd5dcd 100644
--- a/src/mesa/drivers/dri/i915/i915_texstate.c
+++ b/src/mesa/drivers/dri/i915/i915_texstate.c
@@ -246,7 +246,7 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
 
          state[I915_TEXREG_SS2] |=
             (SS2_SHADOW_ENABLE |
-             intel_translate_compare_func(tObj->CompareFunc));
+             intel_translate_shadow_compare_func(tObj->CompareFunc));
 
          minFilt = FILTER_4X4_FLAT;
          magFilt = FILTER_4X4_FLAT;
diff --git a/src/mesa/drivers/dri/i915/intel_context.c b/src/mesa/drivers/dri/i915/intel_context.c
index c67d906db0..f407f7ec47 100644
--- a/src/mesa/drivers/dri/i915/intel_context.c
+++ b/src/mesa/drivers/dri/i915/intel_context.c
@@ -202,7 +202,7 @@ const struct dri_extension card_extensions[] = {
    {"GL_NV_blend_square", NULL},
    {"GL_NV_vertex_program", GL_NV_vertex_program_functions},
    {"GL_NV_vertex_program1_1", NULL},
-/*     { "GL_SGIS_generate_mipmap",           NULL }, */
+   { "GL_SGIS_generate_mipmap", NULL },
    {NULL, NULL}
 };
 
@@ -544,8 +544,8 @@ intelInitContext(struct intel_context *intel,
 
 #if DO_DEBUG
    INTEL_DEBUG = driParseDebugString(getenv("INTEL_DEBUG"), debug_control);
-   if (!intel->ttm && (INTEL_DEBUG & DEBUG_BUFMGR))
-      dri_bufmgr_fake_set_debug(intel->bufmgr, GL_TRUE);
+   if (INTEL_DEBUG & DEBUG_BUFMGR)
+      dri_bufmgr_set_debug(intel->bufmgr, GL_TRUE);
 #endif
 
    if (getenv("INTEL_NO_RAST")) {
@@ -591,8 +591,6 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv)
 	 intel->first_swap_fence = NULL;
       }
 
-      dri_bufmgr_destroy(intel->bufmgr);
-
       if (release_texture_heaps) {
          /* This share group is about to go away, free our private
           * texture object data.
@@ -603,6 +601,8 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv)
 
       /* free the Mesa context */
       _mesa_free_context_data(&intel->ctx);
+
+      dri_bufmgr_destroy(intel->bufmgr);
    }
 }
 
diff --git a/src/mesa/drivers/dri/i915/intel_context.h b/src/mesa/drivers/dri/i915/intel_context.h
index 6f0051ed8b..528e749c33 100644
--- a/src/mesa/drivers/dri/i915/intel_context.h
+++ b/src/mesa/drivers/dri/i915/intel_context.h
@@ -112,6 +112,14 @@ struct intel_context
                                 struct intel_region * draw_region,
                                 struct intel_region * depth_region);
 
+      void (*meta_draw_quad)(struct intel_context *intel,
+			     GLfloat x0, GLfloat x1,
+			     GLfloat y0, GLfloat y1,
+			     GLfloat z,
+			     GLuint color, /* ARGB32 */
+			     GLfloat s0, GLfloat s1,
+			     GLfloat t0, GLfloat t1);
+
       void (*meta_color_mask) (struct intel_context * intel, GLboolean);
 
       void (*meta_stencil_replace) (struct intel_context * intel,
@@ -425,6 +433,7 @@ extern void intelInitStateFuncs(struct dd_function_table *functions);
 #define BLENDFACT_INV_CONST_ALPHA	0x0f
 #define BLENDFACT_MASK          	0x0f
 
+extern int intel_translate_shadow_compare_func(GLenum func);
 extern int intel_translate_compare_func(GLenum func);
 extern int intel_translate_stencil_op(GLenum op);
 extern int intel_translate_blend_factor(GLenum factor);
diff --git a/src/mesa/drivers/dri/i915/intel_ioctl.c b/src/mesa/drivers/dri/i915/intel_ioctl.c
index 37704d66ec..3cd8344b48 100644
--- a/src/mesa/drivers/dri/i915/intel_ioctl.c
+++ b/src/mesa/drivers/dri/i915/intel_ioctl.c
@@ -157,7 +157,7 @@ intel_exec_ioctl(struct intel_context *intel,
    execbuf.batch.DR4 = ((((GLuint) intel->drawX) & 0xffff) |
 			(((GLuint) intel->drawY) << 16));
 
-   execbuf.ops_list = (unsigned)start; // TODO
+   execbuf.ops_list = (unsigned long)start; // TODO
    execbuf.fence_arg.flags = DRM_FENCE_FLAG_SHAREABLE | DRM_I915_FENCE_FLAG_FLUSHED;
 
    if (drmCommandWriteRead(intel->driFd, DRM_I915_EXECBUFFER, &execbuf,
diff --git a/src/mesa/drivers/dri/i915/intel_pixel.c b/src/mesa/drivers/dri/i915/intel_pixel.c
index 9018e3daef..d733c5e874 100644..120000
--- a/src/mesa/drivers/dri/i915/intel_pixel.c
+++ b/src/mesa/drivers/dri/i915/intel_pixel.c
@@ -1,120 +1 @@
-/**************************************************************************
- * 
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portionsalloc
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "enums.h"
-#include "state.h"
-#include "swrast/swrast.h"
-
-#include "intel_context.h"
-#include "intel_pixel.h"
-#include "intel_regions.h"
-
-
-/**
- * Check if any fragment operations are in effect which might effect
- * glDraw/CopyPixels.
- */
-GLboolean
-intel_check_blit_fragment_ops(GLcontext * ctx)
-{
-   if (ctx->NewState)
-      _mesa_update_state(ctx);
-
-   /* XXX Note: Scissor could be done with the blitter:
-    */
-   return !(ctx->_ImageTransferState ||
-            ctx->Color.AlphaEnabled ||
-            ctx->Depth.Test ||
-            ctx->Fog.Enabled ||
-            ctx->Scissor.Enabled ||
-            ctx->Stencil.Enabled ||
-            !ctx->Color.ColorMask[0] ||
-            !ctx->Color.ColorMask[1] ||
-            !ctx->Color.ColorMask[2] ||
-            !ctx->Color.ColorMask[3] ||
-            ctx->Texture._EnabledUnits || 
-	    ctx->FragmentProgram._Enabled ||
-	    ctx->Color.BlendEnabled);
-}
-
-
-GLboolean
-intel_check_meta_tex_fragment_ops(GLcontext * ctx)
-{
-   if (ctx->NewState)
-      _mesa_update_state(ctx);
-
-   /* Some of _ImageTransferState (scale, bias) could be done with
-    * fragment programs on i915.
-    */
-   return !(ctx->_ImageTransferState || ctx->Fog.Enabled ||     /* not done yet */
-            ctx->Texture._EnabledUnits || ctx->FragmentProgram._Enabled);
-}
-
-/* The intel_region struct doesn't really do enough to capture the
- * format of the pixels in the region.  For now this code assumes that
- * the region is a display surface and hence is either ARGB8888 or
- * RGB565.
- * XXX FBO: If we'd pass in the intel_renderbuffer instead of region, we'd
- * know the buffer's pixel format.
- *
- * \param format  as given to glDraw/ReadPixels
- * \param type  as given to glDraw/ReadPixels
- */
-GLboolean
-intel_check_blit_format(struct intel_region * region,
-                        GLenum format, GLenum type)
-{
-   if (region->cpp == 4 &&
-       (type == GL_UNSIGNED_INT_8_8_8_8_REV ||
-        type == GL_UNSIGNED_BYTE) && format == GL_BGRA) {
-      return GL_TRUE;
-   }
-
-   if (region->cpp == 2 &&
-       type == GL_UNSIGNED_SHORT_5_6_5_REV && format == GL_BGR) {
-      return GL_TRUE;
-   }
-
-   if (INTEL_DEBUG & DEBUG_PIXEL)
-      fprintf(stderr, "%s: bad format for blit (cpp %d, type %s format %s)\n",
-              __FUNCTION__, region->cpp,
-              _mesa_lookup_enum_by_nr(type), _mesa_lookup_enum_by_nr(format));
-
-   return GL_FALSE;
-}
-
-
-void
-intelInitPixelFuncs(struct dd_function_table *functions)
-{
-   functions->Accum = _swrast_Accum;
-   functions->Bitmap = _swrast_Bitmap;
-   functions->CopyPixels = intelCopyPixels;
-   functions->ReadPixels = intelReadPixels;
-   functions->DrawPixels = intelDrawPixels;
-}
+../intel/intel_pixel.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_pixel_copy.c b/src/mesa/drivers/dri/i915/intel_pixel_copy.c
index 0bda2d863f..ee43360590 100644..120000
--- a/src/mesa/drivers/dri/i915/intel_pixel_copy.c
+++ b/src/mesa/drivers/dri/i915/intel_pixel_copy.c
@@ -1,382 +1 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "glheader.h"
-#include "enums.h"
-#include "image.h"
-#include "state.h"
-#include "mtypes.h"
-#include "macros.h"
-#include "swrast/swrast.h"
-
-#include "intel_screen.h"
-#include "intel_context.h"
-#include "intel_ioctl.h"
-#include "intel_batchbuffer.h"
-#include "intel_buffers.h"
-#include "intel_blit.h"
-#include "intel_regions.h"
-#include "intel_tris.h"
-#include "intel_pixel.h"
-
-#define FILE_DEBUG_FLAG DEBUG_PIXEL
-
-static struct intel_region *
-copypix_src_region(struct intel_context *intel, GLenum type)
-{
-   switch (type) {
-   case GL_COLOR:
-      return intel_readbuf_region(intel);
-   case GL_DEPTH:
-      /* Don't think this is really possible execpt at 16bpp, when we have no stencil.
-       */
-      if (intel->depth_region && intel->depth_region->cpp == 2)
-         return intel->depth_region;
-   case GL_STENCIL:
-      /* Don't think this is really possible. 
-       */
-      break;
-   case GL_DEPTH_STENCIL_EXT:
-      /* Does it matter whether it is stencil/depth or depth/stencil?
-       */
-      return intel->depth_region;
-   default:
-      break;
-   }
-
-   return NULL;
-}
-
-
-/**
- * Check if any fragment operations are in effect which might effect
- * glCopyPixels.  Differs from intel_check_blit_fragment_ops in that
- * we allow Scissor.
- */
-static GLboolean
-intel_check_copypixel_blit_fragment_ops(GLcontext * ctx)
-{
-   if (ctx->NewState)
-      _mesa_update_state(ctx);
-
-   /* Could do logicop with the blitter: 
-    */
-   return !(ctx->_ImageTransferState ||
-            ctx->Color.AlphaEnabled ||
-            ctx->Depth.Test ||
-            ctx->Fog.Enabled ||
-            ctx->Stencil.Enabled ||
-            !ctx->Color.ColorMask[0] ||
-            !ctx->Color.ColorMask[1] ||
-            !ctx->Color.ColorMask[2] ||
-            !ctx->Color.ColorMask[3] ||
-            ctx->Texture._EnabledUnits ||
-	    ctx->FragmentProgram._Enabled ||
-	    ctx->Color.BlendEnabled);
-}
-
-/* Doesn't work for overlapping regions.  Could do a double copy or
- * just fallback.
- */
-static GLboolean
-do_texture_copypixels(GLcontext * ctx,
-                      GLint srcx, GLint srcy,
-                      GLsizei width, GLsizei height,
-                      GLint dstx, GLint dsty, GLenum type)
-{
-   struct intel_context *intel = intel_context(ctx);
-   struct intel_region *dst = intel_drawbuf_region(intel);
-   struct intel_region *src = copypix_src_region(intel, type);
-   GLenum src_format;
-   GLenum src_type;
-
-   DBG("%s %d,%d %dx%d --> %d,%d\n", __FUNCTION__, 
-       srcx, srcy, width, height, dstx, dsty);
-
-   if (!src || !dst || type != GL_COLOR)
-      return GL_FALSE;
-
-   /* Can't handle overlapping regions.  Don't have sufficient control
-    * over rasterization to pull it off in-place.  Punt on these for
-    * now.
-    * 
-    * XXX: do a copy to a temporary. 
-    */
-   if (src->buffer == dst->buffer) {
-      drm_clip_rect_t srcbox;
-      drm_clip_rect_t dstbox;
-      drm_clip_rect_t tmp;
-
-      srcbox.x1 = srcx;
-      srcbox.y1 = srcy;
-      srcbox.x2 = srcx + width;
-      srcbox.y2 = srcy + height;
-
-      dstbox.x1 = dstx;
-      dstbox.y1 = dsty;
-      dstbox.x2 = dstx + width * ctx->Pixel.ZoomX;
-      dstbox.y2 = dsty + height * ctx->Pixel.ZoomY;
-
-      DBG("src %d,%d %d,%d\n", srcbox.x1, srcbox.y1, srcbox.x2, srcbox.y2);
-      DBG("dst %d,%d %d,%d (%dx%d) (%f,%f)\n", dstbox.x1, dstbox.y1, dstbox.x2, dstbox.y2,
-	  width, height, ctx->Pixel.ZoomX, ctx->Pixel.ZoomY);
-
-      if (intel_intersect_cliprects(&tmp, &srcbox, &dstbox)) {
-         DBG("%s: regions overlap\n", __FUNCTION__);
-         return GL_FALSE;
-      }
-   }
-
-   intelFlush(&intel->ctx);
-
-   intel->vtbl.install_meta_state(intel);
-
-   /* Is this true?  Also will need to turn depth testing on according
-    * to state:
-    */
-   intel->vtbl.meta_no_stencil_write(intel);
-   intel->vtbl.meta_no_depth_write(intel);
-
-   /* Set the 3d engine to draw into the destination region:
-    */
-   intel->vtbl.meta_draw_region(intel, dst, intel->depth_region);
-
-   intel->vtbl.meta_import_pixel_state(intel);
-
-   if (src->cpp == 2) {
-      src_format = GL_RGB;
-      src_type = GL_UNSIGNED_SHORT_5_6_5;
-   }
-   else {
-      src_format = GL_BGRA;
-      src_type = GL_UNSIGNED_BYTE;
-   }
-
-   /* Set the frontbuffer up as a large rectangular texture.
-    */
-   if (!intel->vtbl.meta_tex_rect_source(intel, src->buffer, 0,
-                                         src->pitch,
-                                         src->height, src_format, src_type)) {
-      intel->vtbl.leave_meta_state(intel);
-      return GL_FALSE;
-   }
-
-
-   intel->vtbl.meta_texture_blend_replace(intel);
-
-   LOCK_HARDWARE(intel);
-
-   if (intel->driDrawable->numClipRects) {
-      __DRIdrawablePrivate *dPriv = intel->driDrawable;
-
-
-      srcy = dPriv->h - srcy - height;  /* convert from gl to hardware coords */
-
-      srcx += dPriv->x;
-      srcy += dPriv->y;
-
-      /* Clip against the source region.  This is the only source
-       * clipping we do.  XXX: Just set the texcord wrap mode to clamp
-       * or similar.
-       *
-       */
-      if (0) {
-         GLint orig_x = srcx;
-         GLint orig_y = srcy;
-
-         if (!_mesa_clip_to_region(0, 0, src->pitch, src->height,
-                                   &srcx, &srcy, &width, &height))
-            goto out;
-
-         dstx += srcx - orig_x;
-         dsty += (srcy - orig_y) * ctx->Pixel.ZoomY;
-      }
-
-      /* Just use the regular cliprect mechanism...  Does this need to
-       * even hold the lock???
-       */
-      intel_meta_draw_quad(intel, 
-			   dstx, 
-			   dstx + width * ctx->Pixel.ZoomX, 
-			   dPriv->h - (dsty + height * ctx->Pixel.ZoomY), 
-			   dPriv->h - (dsty), 0,   /* XXX: what z value? */
-                           0x00ff00ff,
-                           srcx, srcx + width, srcy, srcy + height);
-
-    out:
-      intel->vtbl.leave_meta_state(intel);
-      intel_batchbuffer_flush(intel->batch);
-   }
-   UNLOCK_HARDWARE(intel);
-
-   DBG("%s: success\n", __FUNCTION__);
-   return GL_TRUE;
-}
-
-
-
-
-
-/**
- * CopyPixels with the blitter.  Don't support zooming, pixel transfer, etc.
- */
-static GLboolean
-do_blit_copypixels(GLcontext * ctx,
-                   GLint srcx, GLint srcy,
-                   GLsizei width, GLsizei height,
-                   GLint dstx, GLint dsty, GLenum type)
-{
-   struct intel_context *intel = intel_context(ctx);
-   struct intel_region *dst = intel_drawbuf_region(intel);
-   struct intel_region *src = copypix_src_region(intel, type);
-
-   /* Copypixels can be more than a straight copy.  Ensure all the
-    * extra operations are disabled:
-    */
-   if (!intel_check_copypixel_blit_fragment_ops(ctx) ||
-       ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F)
-      return GL_FALSE;
-
-   if (!src || !dst)
-      return GL_FALSE;
-
-
-
-   intelFlush(&intel->ctx);
-
-   LOCK_HARDWARE(intel);
-
-   if (intel->driDrawable->numClipRects) {
-      __DRIdrawablePrivate *dPriv = intel->driDrawable;
-      drm_clip_rect_t *box = dPriv->pClipRects;
-      drm_clip_rect_t dest_rect;
-      GLint nbox = dPriv->numClipRects;
-      GLint delta_x = 0;
-      GLint delta_y = 0;
-      GLuint i;
-
-      /* Do scissoring in GL coordinates:
-       */
-      if (ctx->Scissor.Enabled)
-      {
-	 GLint x = ctx->Scissor.X;
-	 GLint y = ctx->Scissor.Y;
-	 GLuint w = ctx->Scissor.Width;
-	 GLuint h = ctx->Scissor.Height;
-	 GLint dx = dstx - srcx;
-         GLint dy = dsty - srcy;
-
-         if (!_mesa_clip_to_region(x, y, x+w-1, y+h-1, &dstx, &dsty, &width, &height))
-            goto out;
-	 
-         srcx = dstx - dx;
-         srcy = dsty - dy;
-      }
-
-      /* Convert from GL to hardware coordinates:
-       */
-      dsty = dPriv->h - dsty - height;  
-      srcy = dPriv->h - srcy - height;  
-      dstx += dPriv->x;
-      dsty += dPriv->y;
-      srcx += dPriv->x;
-      srcy += dPriv->y;
-
-      /* Clip against the source region.  This is the only source
-       * clipping we do.  Dst is clipped with cliprects below.
-       */
-      {
-         delta_x = srcx - dstx;
-         delta_y = srcy - dsty;
-
-         if (!_mesa_clip_to_region(0, 0, src->pitch, src->height,
-                                   &srcx, &srcy, &width, &height))
-            goto out;
-
-         dstx = srcx - delta_x;
-         dsty = srcy - delta_y;
-      }
-
-      dest_rect.x1 = dstx;
-      dest_rect.y1 = dsty;
-      dest_rect.x2 = dstx + width;
-      dest_rect.y2 = dsty + height;
-
-      /* Could do slightly more clipping: Eg, take the intersection of
-       * the existing set of cliprects and those cliprects translated
-       * by delta_x, delta_y:
-       * 
-       * This code will not overwrite other windows, but will
-       * introduce garbage when copying from obscured window regions.
-       */
-      for (i = 0; i < nbox; i++) {
-         drm_clip_rect_t rect;
-
-         if (!intel_intersect_cliprects(&rect, &dest_rect, &box[i]))
-            continue;
-
-
-         intelEmitCopyBlit(intel, dst->cpp, 
-			   src->pitch, src->buffer, 0, src->tiled,
-			   dst->pitch, dst->buffer, 0, dst->tiled,
-			   rect.x1 + delta_x, 
-			   rect.y1 + delta_y,       /* srcx, srcy */
-                           rect.x1, rect.y1,    /* dstx, dsty */
-                           rect.x2 - rect.x1, rect.y2 - rect.y1,
-			   ctx->Color.ColorLogicOpEnabled ?
-			   ctx->Color.LogicOp : GL_COPY);
-      }
-
-    out:
-      intel_batchbuffer_flush(intel->batch);
-   }
-   UNLOCK_HARDWARE(intel);
-
-   DBG("%s: success\n", __FUNCTION__);
-   return GL_TRUE;
-}
-
-
-void
-intelCopyPixels(GLcontext * ctx,
-                GLint srcx, GLint srcy,
-                GLsizei width, GLsizei height,
-                GLint destx, GLint desty, GLenum type)
-{
-   if (INTEL_DEBUG & DEBUG_PIXEL)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
-   if (do_blit_copypixels(ctx, srcx, srcy, width, height, destx, desty, type))
-      return;
-
-   if (do_texture_copypixels(ctx, srcx, srcy, width, height, destx, desty, type))
-      return;
-
-   DBG("fallback to _swrast_CopyPixels\n");
-
-   _swrast_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type);
-}
+../intel/intel_pixel_copy.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_pixel_draw.c b/src/mesa/drivers/dri/i915/intel_pixel_draw.c
index 0fea9a1d01..8431a24edf 100644..120000
--- a/src/mesa/drivers/dri/i915/intel_pixel_draw.c
+++ b/src/mesa/drivers/dri/i915/intel_pixel_draw.c
@@ -1,386 +1 @@
-/**************************************************************************
- * 
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portionsalloc
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "glheader.h"
-#include "enums.h"
-#include "image.h"
-#include "mtypes.h"
-#include "macros.h"
-#include "bufferobj.h"
-#include "swrast/swrast.h"
-
-#include "intel_screen.h"
-#include "intel_context.h"
-#include "intel_ioctl.h"
-#include "intel_batchbuffer.h"
-#include "intel_blit.h"
-#include "intel_buffers.h"
-#include "intel_regions.h"
-#include "intel_pixel.h"
-#include "intel_buffer_objects.h"
-#include "intel_tris.h"
-
-
-
-static GLboolean
-do_texture_drawpixels(GLcontext * ctx,
-                      GLint x, GLint y,
-                      GLsizei width, GLsizei height,
-                      GLenum format, GLenum type,
-                      const struct gl_pixelstore_attrib *unpack,
-                      const GLvoid * pixels)
-{
-   struct intel_context *intel = intel_context(ctx);
-   struct intel_region *dst = intel_drawbuf_region(intel);
-   struct intel_buffer_object *src = intel_buffer_object(unpack->BufferObj);
-   GLuint rowLength = unpack->RowLength ? unpack->RowLength : width;
-   GLuint src_offset;
-
-   if (INTEL_DEBUG & DEBUG_PIXEL)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
-   intelFlush(&intel->ctx);
-   intel->vtbl.render_start(intel);
-   intel->vtbl.emit_state(intel);
-
-   if (!dst)
-      return GL_FALSE;
-
-   if (src) {
-      if (!_mesa_validate_pbo_access(2, unpack, width, height, 1,
-                                     format, type, pixels)) {
-         _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawPixels");
-         return GL_TRUE;
-      }
-   }
-   else {
-      /* PBO only for now:
-       */
-/*       _mesa_printf("%s - not PBO\n", __FUNCTION__); */
-      return GL_FALSE;
-   }
-
-   /* There are a couple of things we can't do yet, one of which is
-    * set the correct state for pixel operations when GL texturing is
-    * enabled.  That's a pretty rare state and probably not worth the
-    * effort.  A completely device-independent version of this may do
-    * more.
-    *
-    * Similarly, we make no attempt to merge metaops processing with
-    * an enabled fragment program, though it would certainly be
-    * possible.
-    */
-   if (!intel_check_meta_tex_fragment_ops(ctx)) {
-      if (INTEL_DEBUG & DEBUG_PIXEL)
-         _mesa_printf("%s - bad GL fragment state for metaops texture\n",
-                      __FUNCTION__);
-      return GL_FALSE;
-   }
-
-   intel->vtbl.install_meta_state(intel);
-
-
-   /* Is this true?  Also will need to turn depth testing on according
-    * to state:
-    */
-   intel->vtbl.meta_no_stencil_write(intel);
-   intel->vtbl.meta_no_depth_write(intel);
-
-   /* Set the 3d engine to draw into the destination region:
-    */
-   intel->vtbl.meta_draw_region(intel, dst, intel->depth_region);
-
-   intel->vtbl.meta_import_pixel_state(intel);
-
-   src_offset = (GLuint) _mesa_image_address(2, unpack, pixels, width, height,
-                                             format, type, 0, 0, 0);
-
-
-   /* Setup the pbo up as a rectangular texture, if possible.
-    *
-    * TODO: This is almost always possible if the i915 fragment
-    * program is adjusted to correctly swizzle the sampled colors.
-    * The major exception is any 24bit texture, like RGB888, for which
-    * there is no hardware support.  
-    */
-   if (!intel->vtbl.meta_tex_rect_source(intel, src->buffer, src_offset,
-                                         rowLength, height, format, type)) {
-      intel->vtbl.leave_meta_state(intel);
-      return GL_FALSE;
-   }
-
-   intel->vtbl.meta_texture_blend_replace(intel);
-
-
-   LOCK_HARDWARE(intel);
-
-   if (intel->driDrawable->numClipRects) {
-      __DRIdrawablePrivate *dPriv = intel->driDrawable;
-      GLint srcx, srcy;
-      GLint dstx, dsty;
-
-      dstx = x;
-      dsty = dPriv->h - (y + height);
-
-      srcx = 0;                 /* skiprows/pixels already done */
-      srcy = 0;
-
-      if (0) {
-         const GLint orig_x = dstx;
-         const GLint orig_y = dsty;
-
-         if (!_mesa_clip_to_region(0, 0, dst->pitch, dst->height,
-                                   &dstx, &dsty, &width, &height))
-            goto out;
-
-         srcx += dstx - orig_x;
-         srcy += dsty - orig_y;
-      }
-
-
-      if (INTEL_DEBUG & DEBUG_PIXEL)
-         _mesa_printf("draw %d,%d %dx%d\n", dstx, dsty, width, height);
-
-      /* Must use the regular cliprect mechanism in order to get the
-       * drawing origin set correctly.  Otherwise scissor state is in
-       * incorrect coordinate space.  Does this even need to hold the
-       * lock???
-       */
-      intel_meta_draw_quad(intel,
-                           dstx, dstx + width * ctx->Pixel.ZoomX,
-                           dPriv->h - (y + height * ctx->Pixel.ZoomY),
-                           dPriv->h - (y),
-                           -ctx->Current.RasterPos[2] * .5,
-                           0x00ff00ff,
-                           srcx, srcx + width, srcy + height, srcy);
-    out:
-      intel->vtbl.leave_meta_state(intel);
-      intel_batchbuffer_flush(intel->batch);
-   }
-   UNLOCK_HARDWARE(intel);
-   return GL_TRUE;
-}
-
-
-
-
-
-/* Pros:  
- *   - no waiting for idle before updating framebuffer.
- *   
- * Cons:
- *   - if upload is by memcpy, this may actually be slower than fallback path.
- *   - uploads the whole image even if destination is clipped
- *   
- * Need to benchmark.
- *
- * Given the questions about performance, implement for pbo's only.
- * This path is definitely a win if the pbo is already in agp.  If it
- * turns out otherwise, we can add the code necessary to upload client
- * data to agp space before performing the blit.  (Though it may turn
- * out to be better/simpler just to use the texture engine).
- */
-static GLboolean
-do_blit_drawpixels(GLcontext * ctx,
-                   GLint x, GLint y,
-                   GLsizei width, GLsizei height,
-                   GLenum format, GLenum type,
-                   const struct gl_pixelstore_attrib *unpack,
-                   const GLvoid * pixels)
-{
-   struct intel_context *intel = intel_context(ctx);
-   struct intel_region *dest = intel_drawbuf_region(intel);
-   struct intel_buffer_object *src = intel_buffer_object(unpack->BufferObj);
-   GLuint src_offset;
-   GLuint rowLength;
-   dri_fence *fence = NULL;
-
-   if (INTEL_DEBUG & DEBUG_PIXEL)
-      _mesa_printf("%s\n", __FUNCTION__);
-
-
-   if (!dest) {
-      if (INTEL_DEBUG & DEBUG_PIXEL)
-         _mesa_printf("%s - no dest\n", __FUNCTION__);
-      return GL_FALSE;
-   }
-
-   if (src) {
-      /* This validation should be done by core mesa:
-       */
-      if (!_mesa_validate_pbo_access(2, unpack, width, height, 1,
-                                     format, type, pixels)) {
-         _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawPixels");
-         return GL_TRUE;
-      }
-   }
-   else {
-      /* PBO only for now:
-       */
-      if (INTEL_DEBUG & DEBUG_PIXEL)
-         _mesa_printf("%s - not PBO\n", __FUNCTION__);
-      return GL_FALSE;
-   }
-
-   if (!intel_check_blit_format(dest, format, type)) {
-      if (INTEL_DEBUG & DEBUG_PIXEL)
-         _mesa_printf("%s - bad format for blit\n", __FUNCTION__);
-      return GL_FALSE;
-   }
-
-   if (!intel_check_blit_fragment_ops(ctx)) {
-      if (INTEL_DEBUG & DEBUG_PIXEL)
-         _mesa_printf("%s - bad GL fragment state for blitter\n",
-                      __FUNCTION__);
-      return GL_FALSE;
-   }
-
-   if (ctx->Pixel.ZoomX != 1.0F) {
-      if (INTEL_DEBUG & DEBUG_PIXEL)
-         _mesa_printf("%s - bad PixelZoomX for blit\n", __FUNCTION__);
-      return GL_FALSE;
-   }
-
-
-   if (unpack->RowLength > 0)
-      rowLength = unpack->RowLength;
-   else
-      rowLength = width;
-
-   if (ctx->Pixel.ZoomY == -1.0F) {
-      if (INTEL_DEBUG & DEBUG_PIXEL)
-         _mesa_printf("%s - bad PixelZoomY for blit\n", __FUNCTION__);
-      return GL_FALSE;          /* later */
-      y -= height;
-   }
-   else if (ctx->Pixel.ZoomY == 1.0F) {
-      rowLength = -rowLength;
-   }
-   else {
-      if (INTEL_DEBUG & DEBUG_PIXEL)
-         _mesa_printf("%s - bad PixelZoomY for blit\n", __FUNCTION__);
-      return GL_FALSE;
-   }
-
-   src_offset = (GLuint) _mesa_image_address(2, unpack, pixels, width, height,
-                                             format, type, 0, 0, 0);
-
-   intelFlush(&intel->ctx);
-   LOCK_HARDWARE(intel);
-
-   if (intel->driDrawable->numClipRects) {
-      __DRIdrawablePrivate *dPriv = intel->driDrawable;
-      int nbox = dPriv->numClipRects;
-      drm_clip_rect_t *box = dPriv->pClipRects;
-      drm_clip_rect_t rect;
-      drm_clip_rect_t dest_rect;
-      dri_bo *src_buffer = intel_bufferobj_buffer(intel, src, INTEL_READ);
-      int i;
-
-      dest_rect.x1 = dPriv->x + x;
-      dest_rect.y1 = dPriv->y + dPriv->h - (y + height);
-      dest_rect.x2 = dest_rect.x1 + width;
-      dest_rect.y2 = dest_rect.y1 + height;
-
-      for (i = 0; i < nbox; i++) {
-         if (!intel_intersect_cliprects(&rect, &dest_rect, &box[i]))
-            continue;
-
-         intelEmitCopyBlit(intel,
-                           dest->cpp,
-                           rowLength, src_buffer, src_offset, GL_FALSE,
-                           dest->pitch, dest->buffer, 0, dest->tiled,
-                           rect.x1 - dest_rect.x1,
-                           rect.y2 - dest_rect.y2,
-                           rect.x1,
-                           rect.y1, rect.x2 - rect.x1, rect.y2 - rect.y1,
-			   ctx->Color.ColorLogicOpEnabled ?
-			   ctx->Color.LogicOp : GL_COPY);
-      }
-      intel_batchbuffer_flush(intel->batch);
-      fence = intel->batch->last_fence;
-      dri_fence_reference(fence);
-   }
-   UNLOCK_HARDWARE(intel);
-
-   if (fence) {
-      dri_fence_wait(fence);
-      dri_fence_unreference(fence);
-   }
-
-   if (INTEL_DEBUG & DEBUG_PIXEL)
-      _mesa_printf("%s - DONE\n", __FUNCTION__);
-
-   return GL_TRUE;
-}
-
-
-
-void
-intelDrawPixels(GLcontext * ctx,
-                GLint x, GLint y,
-                GLsizei width, GLsizei height,
-                GLenum format,
-                GLenum type,
-                const struct gl_pixelstore_attrib *unpack,
-                const GLvoid * pixels)
-{
-   if (do_blit_drawpixels(ctx, x, y, width, height, format, type,
-                          unpack, pixels))
-      return;
-
-   if (do_texture_drawpixels(ctx, x, y, width, height, format, type,
-                             unpack, pixels))
-      return;
-
-
-   if (INTEL_DEBUG & DEBUG_PIXEL)
-      _mesa_printf("%s: fallback to swrast\n", __FUNCTION__);
-
-   if (ctx->FragmentProgram._Current == ctx->FragmentProgram._TexEnvProgram) {
-      /*
-       * We don't want the i915 texenv program to be applied to DrawPixels.
-       * This is really just a performance optimization (mesa will other-
-       * wise happily run the fragment program on each pixel in the image).
-       */
-      struct gl_fragment_program *fpSave = ctx->FragmentProgram._Current;
-   /* can't just set current frag prog to 0 here as on buffer resize
-      we'll get new state checks which will segfault. Remains a hack. */
-      ctx->FragmentProgram._Current = NULL;
-      ctx->FragmentProgram._UseTexEnvProgram = GL_FALSE;
-      ctx->FragmentProgram._Active = GL_FALSE;
-      _swrast_DrawPixels( ctx, x, y, width, height, format, type,
-                          unpack, pixels );
-      ctx->FragmentProgram._Current = fpSave;
-      ctx->FragmentProgram._UseTexEnvProgram = GL_TRUE;
-      ctx->FragmentProgram._Active = GL_TRUE;
-   }
-   else {
-      _swrast_DrawPixels( ctx, x, y, width, height, format, type,
-                          unpack, pixels );
-   }
-}
+../intel/intel_pixel_draw.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_render.c b/src/mesa/drivers/dri/i915/intel_render.c
index c8b6d308d9..473ddb8393 100644
--- a/src/mesa/drivers/dri/i915/intel_render.c
+++ b/src/mesa/drivers/dri/i915/intel_render.c
@@ -216,7 +216,7 @@ intel_run_render(GLcontext * ctx, struct tnl_pipeline_stage *stage)
    tnl->Driver.Render.Start(ctx);
 
    for (i = 0; i < VB->PrimitiveCount; i++) {
-      GLuint prim = VB->Primitive[i].mode;
+      GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
       GLuint start = VB->Primitive[i].start;
       GLuint length = VB->Primitive[i].count;
 
diff --git a/src/mesa/drivers/dri/i915/intel_state.c b/src/mesa/drivers/dri/i915/intel_state.c
index 271511037e..558f068a42 100644
--- a/src/mesa/drivers/dri/i915/intel_state.c
+++ b/src/mesa/drivers/dri/i915/intel_state.c
@@ -39,6 +39,32 @@
 #include "intel_regions.h"
 #include "swrast/swrast.h"
 
+int 
+intel_translate_shadow_compare_func( GLenum func )
+{
+   switch(func) {
+   case GL_NEVER: 
+       return COMPAREFUNC_ALWAYS; 
+   case GL_LESS: 
+       return COMPAREFUNC_LEQUAL; 
+   case GL_LEQUAL: 
+       return COMPAREFUNC_LESS;
+   case GL_GREATER: 
+       return COMPAREFUNC_GEQUAL; 
+   case GL_GEQUAL: 
+      return COMPAREFUNC_GREATER; 
+   case GL_NOTEQUAL: 
+      return COMPAREFUNC_EQUAL; 
+   case GL_EQUAL: 
+      return COMPAREFUNC_NOTEQUAL; 
+   case GL_ALWAYS: 
+       return COMPAREFUNC_NEVER; 
+   }
+
+   fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func);
+   return COMPAREFUNC_NEVER; 
+}
+
 int
 intel_translate_compare_func(GLenum func)
 {
diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c
index 4b45dc065c..61b0bb3fd3 100644
--- a/src/mesa/drivers/dri/i915/intel_tris.c
+++ b/src/mesa/drivers/dri/i915/intel_tris.c
@@ -1066,7 +1066,7 @@ union fi
 /**********************************************************************/
 /*             Used only with the metaops callbacks.                  */
 /**********************************************************************/
-void
+static void
 intel_meta_draw_poly(struct intel_context *intel,
                      GLuint n,
                      GLfloat xy[][2],
@@ -1074,8 +1074,10 @@ intel_meta_draw_poly(struct intel_context *intel,
 {
    union fi *vb;
    GLint i;
+   GLboolean was_locked = intel->locked;
 
-   LOCK_HARDWARE(intel);
+   if (!was_locked)
+       LOCK_HARDWARE(intel);
 
    /* All 3d primitives should be emitted with INTEL_BATCH_CLIPRECTS,
     * otherwise the drawing origin (DR4) might not be set correctly.
@@ -1094,10 +1096,12 @@ intel_meta_draw_poly(struct intel_context *intel,
    }
 
    INTEL_FIREVERTICES(intel);
-   UNLOCK_HARDWARE(intel);
+
+   if (!was_locked)
+       UNLOCK_HARDWARE(intel);
 }
 
-void
+static void
 intel_meta_draw_quad(struct intel_context *intel,
                      GLfloat x0, GLfloat x1,
                      GLfloat y0, GLfloat y1,
@@ -1139,6 +1143,7 @@ intel_meta_draw_quad(struct intel_context *intel,
 void
 intelInitTriFuncs(GLcontext * ctx)
 {
+   struct intel_context *intel = intel_context(ctx);
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    static int firsttime = 1;
 
@@ -1155,4 +1160,6 @@ intelInitTriFuncs(GLcontext * ctx)
    tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
    tnl->Driver.Render.CopyPV = _tnl_copy_pv;
    tnl->Driver.Render.Interp = _tnl_interp;
+
+   intel->vtbl.meta_draw_quad = intel_meta_draw_quad;
 }
diff --git a/src/mesa/drivers/dri/i915/intel_tris.h b/src/mesa/drivers/dri/i915/intel_tris.h
index b7bae8cd3b..021e5c6450 100644
--- a/src/mesa/drivers/dri/i915/intel_tris.h
+++ b/src/mesa/drivers/dri/i915/intel_tris.h
@@ -51,19 +51,4 @@ extern void intelWrapInlinePrimitive(struct intel_context *intel);
 GLuint *intelExtendInlinePrimitive(struct intel_context *intel,
                                    GLuint dwords);
 
-
-void intel_meta_draw_quad(struct intel_context *intel,
-                          GLfloat x0, GLfloat x1,
-                          GLfloat y0, GLfloat y1,
-                          GLfloat z,
-                          GLuint color,
-                          GLfloat s0, GLfloat s1, GLfloat t0, GLfloat t1);
-
-void intel_meta_draw_poly(struct intel_context *intel,
-                          GLuint n,
-                          GLfloat xy[][2],
-                          GLfloat z, GLuint color, GLfloat tex[][2]);
-
-
-
 #endif
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index 5b1a83bccc..a4d125f926 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -12,6 +12,8 @@ DRIVER_SOURCES = \
 	intel_bufmgr_ttm.c \
 	intel_context.c \
 	intel_decode.c \
+	intel_depthstencil.c \
+	intel_fbo.c \
 	intel_ioctl.c \
 	intel_mipmap_tree.c \
 	intel_regions.c \
@@ -21,8 +23,11 @@ DRIVER_SOURCES = \
 	intel_pixel_bitmap.c \
 	intel_state.c \
 	intel_tex.c \
+	intel_tex_copy.c \
 	intel_tex_format.c \
+	intel_tex_image.c \
 	intel_tex_layout.c \
+	intel_tex_subimage.c \
 	intel_tex_validate.c \
 	brw_cc.c \
 	brw_clip.c \
@@ -54,7 +59,6 @@ DRIVER_SOURCES = \
 	brw_state_batch.c \
 	brw_state_cache.c \
 	brw_state_dump.c \
-	brw_state_pool.c \
 	brw_state_upload.c \
 	brw_tex.c \
 	brw_tex_layout.c \
diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c
index 8a1d1527db..357704c577 100644
--- a/src/mesa/drivers/dri/i965/brw_cc.c
+++ b/src/mesa/drivers/dri/i965/brw_cc.c
@@ -46,7 +46,8 @@ static void upload_cc_vp( struct brw_context *brw )
    ccv.min_depth = 0.0;
    ccv.max_depth = 1.0;
 
-   brw->cc.vp_gs_offset = brw_cache_data( &brw->cache[BRW_CC_VP], &ccv );
+   dri_bo_unreference(brw->cc.vp_bo);
+   brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 );
 }
 
 const struct brw_tracked_state brw_cc_vp = {
@@ -76,8 +77,8 @@ static void upload_cc_unit( struct brw_context *brw )
       cc.cc1.stencil_write_mask = brw->attribs.Stencil->WriteMask[0];
       cc.cc1.stencil_test_mask = brw->attribs.Stencil->ValueMask[0];
 
-      if (brw->attribs.Stencil->TestTwoSide) {
-	 cc.cc0.bf_stencil_enable = brw->attribs.Stencil->TestTwoSide;
+      if (brw->attribs.Stencil->_TestTwoSide) {
+	 cc.cc0.bf_stencil_enable = brw->attribs.Stencil->_TestTwoSide;
 	 cc.cc0.bf_stencil_func = intel_translate_compare_func(brw->attribs.Stencil->Function[1]);
 	 cc.cc0.bf_stencil_fail_op = intel_translate_stencil_op(brw->attribs.Stencil->FailFunc[1]);
 	 cc.cc0.bf_stencil_pass_depth_fail_op = intel_translate_stencil_op(brw->attribs.Stencil->ZFailFunc[1]);
@@ -90,7 +91,8 @@ static void upload_cc_unit( struct brw_context *brw )
       /* Not really sure about this:
        */
       if (brw->attribs.Stencil->WriteMask[0] ||
-	  (brw->attribs.Stencil->TestTwoSide && brw->attribs.Stencil->WriteMask[1]))
+	  (brw->attribs.Stencil->_TestTwoSide &&
+	   brw->attribs.Stencil->WriteMask[1]))
 	 cc.cc0.stencil_write_enable = 1;
    }
 
@@ -152,12 +154,24 @@ static void upload_cc_unit( struct brw_context *brw )
    }
  
    /* CACHE_NEW_CC_VP */
-   cc.cc4.cc_viewport_state_offset =  brw->cc.vp_gs_offset >> 5;
+   cc.cc4.cc_viewport_state_offset = brw->cc.vp_bo->offset >> 5; /* reloc */
  
    if (INTEL_DEBUG & DEBUG_STATS)
       cc.cc5.statistics_enable = 1; 
 
-   brw->cc.state_gs_offset = brw_cache_data( &brw->cache[BRW_CC_UNIT], &cc );
+   dri_bo_unreference(brw->cc.state_bo);
+   brw->cc.state_bo = brw_cache_data( &brw->cache, BRW_CC_UNIT, &cc,
+				      &brw->cc.vp_bo, 1);
+}
+
+static void emit_reloc_cc_unit(struct brw_context *brw)
+{
+   /* Emit CC viewport relocation */
+   dri_emit_reloc(brw->cc.state_bo,
+		  DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+		  0,
+		  offsetof(struct brw_cc_unit_state, cc4),
+		  brw->cc.vp_bo);
 }
 
 const struct brw_tracked_state brw_cc_unit = {
@@ -166,7 +180,8 @@ const struct brw_tracked_state brw_cc_unit = {
       .brw = 0,
       .cache = CACHE_NEW_CC_VP
    },
-   .update = upload_cc_unit
+   .update = upload_cc_unit,
+   .emit_reloc = emit_reloc_cc_unit,
 };
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c
index 8287fd9edf..e6f3f63126 100644
--- a/src/mesa/drivers/dri/i965/brw_clip.c
+++ b/src/mesa/drivers/dri/i965/brw_clip.c
@@ -119,28 +119,16 @@ static void compile_clip_prog( struct brw_context *brw,
 
    /* Upload
     */
-   brw->clip.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_CLIP_PROG],
-						&c.key,
-						sizeof(c.key),
-						program,
-						program_size,
-						&c.prog_data,
-						&brw->clip.prog_data );
+   dri_bo_unreference(brw->clip.prog_bo);
+   brw->clip.prog_bo = brw_upload_cache( &brw->cache,
+					 BRW_CLIP_PROG,
+					 &c.key, sizeof(c.key),
+					 NULL, 0,
+					 program, program_size,
+					 &c.prog_data,
+					 &brw->clip.prog_data );
 }
 
-
-static GLboolean search_cache( struct brw_context *brw, 
-			       struct brw_clip_prog_key *key )
-{
-   return brw_search_cache(&brw->cache[BRW_CLIP_PROG], 
-			   key, sizeof(*key),
-			   &brw->clip.prog_data,
-			   &brw->clip.prog_gs_offset);
-}
-
-
-
-
 /* Calculate interpolants for triangle and line rasterization.
  */
 static void upload_clip_prog( struct brw_context *brw )
@@ -252,7 +240,12 @@ static void upload_clip_prog( struct brw_context *brw )
       }
    }
 
-   if (!search_cache(brw, &key))
+   dri_bo_unreference(brw->clip.prog_bo);
+   brw->clip.prog_bo = brw_search_cache(&brw->cache, BRW_CLIP_PROG,
+					&key, sizeof(key),
+					NULL, 0,
+					&brw->clip.prog_data);
+   if (brw->clip.prog_bo == NULL)
       compile_clip_prog( brw, &key );
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c
index ba2f0edf51..7640a39b45 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_state.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_state.c
@@ -45,7 +45,8 @@ static void upload_clip_unit( struct brw_context *brw )
    /* CACHE_NEW_CLIP_PROG */
    clip.thread0.grf_reg_count =
       ALIGN(brw->clip.prog_data->total_grf, 16) / 16 - 1;
-   clip.thread0.kernel_start_pointer = brw->clip.prog_gs_offset >> 6;
+   /* reloc */
+   clip.thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6;
    clip.thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length;
    clip.thread3.const_urb_entry_read_length = brw->clip.prog_data->curb_read_length;
    clip.clip5.clip_mode = brw->clip.prog_data->clip_mode;
@@ -79,9 +80,24 @@ static void upload_clip_unit( struct brw_context *brw )
    clip.viewport_ymin = -1;
    clip.viewport_ymax = 1;
 
-   brw->clip.state_gs_offset = brw_cache_data( &brw->cache[BRW_CLIP_UNIT], &clip );
+   brw->clip.thread0_delta = clip.thread0.grf_reg_count << 1;
+
+   dri_bo_unreference(brw->clip.state_bo);
+   brw->clip.state_bo = brw_cache_data( &brw->cache, BRW_CLIP_UNIT, &clip,
+					&brw->clip.prog_bo, 1);
 }
 
+static void emit_reloc_clip_unit(struct brw_context *brw)
+{
+   if (!brw->metaops.active) {
+      /* Emit clip program relocation */
+      dri_emit_reloc(brw->clip.state_bo,
+		     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+		     brw->clip.thread0_delta,
+		     offsetof(struct brw_clip_unit_state, thread0),
+		     brw->clip.prog_bo);
+   }
+}
 
 const struct brw_tracked_state brw_clip_unit = {
    .dirty = {
@@ -90,5 +106,6 @@ const struct brw_tracked_state brw_clip_unit = {
 		BRW_NEW_URB_FENCE),
       .cache = CACHE_NEW_CLIP_PROG
    },
-   .update = upload_clip_unit
+   .update = upload_clip_unit,
+   .emit_reloc = emit_reloc_clip_unit,
 };
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 4654ab1ddf..3c463c3b9a 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -169,8 +169,6 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
    brw->state.dirty.mesa = ~0;
    brw->state.dirty.brw = ~0;
 
-   memset(&brw->wm.bind, ~0, sizeof(brw->wm.bind));
-
    brw->emit_state_always = 0;
 
    ctx->FragmentProgram._MaintainTexEnvProgram = 1;
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 68afea111d..a5ef058e8c 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -139,8 +139,13 @@ struct brw_context;
 
 
 struct brw_state_flags {
+   /** State update flags signalled by mesa internals */
    GLuint mesa;
+   /** State update flags signalled by brw_state_cache.c searches */
    GLuint cache;
+   /**
+    * State update flags signalled as the result of brw_tracked_state updates
+    */
    GLuint brw;
 };
 
@@ -232,30 +237,44 @@ struct brw_vs_ouput_sizes {
 #define BRW_MAX_TEX_UNIT 8
 #define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1
 
-/* Create a fixed sized struct for caching binding tables:
- */
-struct brw_surface_binding_table {
-   GLuint surf_ss_offset[BRW_WM_MAX_SURF];
-};
-
-
-struct brw_cache;
-
-struct brw_mem_pool {
-   dri_bo *buffer;
-
-   GLuint size;
-   GLuint offset;		/* offset of first free byte */
+enum brw_cache_id {
+   BRW_CC_VP,
+   BRW_CC_UNIT,
+   BRW_WM_PROG,
+   BRW_SAMPLER_DEFAULT_COLOR,
+   BRW_SAMPLER,
+   BRW_WM_UNIT,
+   BRW_SF_PROG,
+   BRW_SF_VP,
+   BRW_SF_UNIT,
+   BRW_VS_UNIT,
+   BRW_VS_PROG,
+   BRW_GS_UNIT,
+   BRW_GS_PROG,
+   BRW_CLIP_VP,
+   BRW_CLIP_UNIT,
+   BRW_CLIP_PROG,
+   BRW_SS_SURFACE,
+   BRW_SS_SURF_BIND,
 
-   struct brw_context *brw;
+   BRW_MAX_CACHE
 };
 
 struct brw_cache_item {
+   /**
+    * Effectively part of the key, cache_id identifies what kind of state
+    * buffer is involved, and also which brw->state.dirty.cache flag should
+    * be set when this cache item is chosen.
+    */
+   enum brw_cache_id cache_id;
+   /** 32-bit hash of the key data */
    GLuint hash;
    GLuint key_size;		/* for variable-sized keys */
    const void *key;
+   dri_bo **reloc_bufs;
+   GLuint nr_reloc_bufs;
 
-   GLuint offset;		/* offset within pool's buffer */
+   dri_bo *bo;
    GLuint data_size;
 
    struct brw_cache_item *next;
@@ -264,20 +283,19 @@ struct brw_cache_item {
 
 
 struct brw_cache {
-   GLuint id;
-
-   const char *name;
-
    struct brw_context *brw;
-   struct brw_mem_pool *pool;
 
    struct brw_cache_item **items;
    GLuint size, n_items;
-   
-   GLuint key_size;		/* for fixed-size keys */
-   GLuint aux_size;
 
-   GLuint last_addr;			/* offset of active item */
+   GLuint key_size[BRW_MAX_CACHE];		/* for fixed-size keys */
+   GLuint aux_size[BRW_MAX_CACHE];
+   char *name[BRW_MAX_CACHE];
+
+   /* Record of the last BOs chosen for each cache_id.  Used to set
+    * brw->state.dirty.cache when a new cache item is chosen.
+    */
+   dri_bo *last_bo[BRW_MAX_CACHE];
 };
 
 
@@ -314,33 +332,6 @@ struct brw_tracked_state {
    GLboolean always_update;
 };
 
-
-enum brw_cache_id {
-   BRW_CC_VP,
-   BRW_CC_UNIT,
-   BRW_WM_PROG,
-   BRW_SAMPLER_DEFAULT_COLOR,
-   BRW_SAMPLER,
-   BRW_WM_UNIT,
-   BRW_SF_PROG,
-   BRW_SF_VP,
-   BRW_SF_UNIT,
-   BRW_VS_UNIT,
-   BRW_VS_PROG,
-   BRW_GS_UNIT,
-   BRW_GS_PROG,
-   BRW_CLIP_VP,
-   BRW_CLIP_UNIT,
-   BRW_CLIP_PROG,
-
-   /* These two are in the SS pool:
-    */
-   BRW_SS_SURFACE,
-   BRW_SS_SURF_BIND,
-
-   BRW_MAX_CACHE
-};
-
 /* Flags for brw->state.cache.
  */
 #define CACHE_NEW_CC_VP                  (1<<BRW_CC_VP)
@@ -362,16 +353,6 @@ enum brw_cache_id {
 #define CACHE_NEW_SURFACE                (1<<BRW_SS_SURFACE)
 #define CACHE_NEW_SURF_BIND              (1<<BRW_SS_SURF_BIND)
 
-
-
-
-enum brw_mempool_id {
-   BRW_GS_POOL,
-   BRW_SS_POOL,
-   BRW_MAX_POOL
-};
-
-
 struct brw_cached_batch_item {
    struct header *header;
    GLuint sz;
@@ -442,8 +423,7 @@ struct brw_context
    } state;
 
    struct brw_state_pointers attribs;
-   struct brw_mem_pool pool[BRW_MAX_POOL];
-   struct brw_cache cache[BRW_MAX_CACHE];
+   struct brw_cache cache;
    struct brw_cached_batch_item *cached_batch_items;
 
    struct {
@@ -551,7 +531,7 @@ struct brw_context
        */
       struct brw_tracked_state tracked_state;
 
-      GLuint gs_offset;
+      dri_bo *curbe_bo;
 
       GLfloat *last_buf;
       GLuint last_bufsz;
@@ -560,33 +540,38 @@ struct brw_context
    struct {
       struct brw_vs_prog_data *prog_data;
 
-      GLuint prog_gs_offset;
-      GLuint state_gs_offset;	
+      GLuint thread0_delta;
+      dri_bo *prog_bo;
+      dri_bo *state_bo;
    } vs;
 
    struct {
       struct brw_gs_prog_data *prog_data;
 
       GLboolean prog_active;
-      GLuint prog_gs_offset;
-      GLuint state_gs_offset;	
+      GLuint thread0_delta;
+      dri_bo *prog_bo;
+      dri_bo *state_bo;
    } gs;
 
    struct {
       struct brw_clip_prog_data *prog_data;
 
-      GLuint prog_gs_offset;
-      GLuint vp_gs_offset;
-      GLuint state_gs_offset;	
+      GLuint thread0_delta;
+      dri_bo *prog_bo;
+      dri_bo *state_bo;
+      dri_bo *vp_bo;
    } clip;
 
 
    struct {
       struct brw_sf_prog_data *prog_data;
 
-      GLuint prog_gs_offset;
-      GLuint vp_gs_offset;
-      GLuint state_gs_offset;
+      GLuint thread0_delta;
+      GLuint sf5_delta;
+      dri_bo *prog_bo;
+      dri_bo *state_bo;
+      dri_bo *vp_bo;
    } sf;
 
    struct {
@@ -599,10 +584,12 @@ struct brw_context
 
 
       /**
-       * Array of sampler state uploaded at sampler_gs_offset of BRW_SAMPLER
+       * Array of sampler state uploaded at sampler_bo of BRW_SAMPLER
        * cache
        */
       struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT];
+      /** Array of surface default colors (texture border color) */
+      dri_bo *sdc_bo[BRW_MAX_TEX_UNIT];
 
       GLuint render_surf;
       GLuint nr_surfaces;      
@@ -612,19 +599,24 @@ struct brw_context
       GLuint scratch_buffer_size;
 
       GLuint sampler_count;
-      GLuint sampler_gs_offset;
+      dri_bo *sampler_bo;
 
-      struct brw_surface_binding_table bind;
-      GLuint bind_ss_offset;
+      /** Binding table of pointers to surf_bo entries */
+      dri_bo *bind_bo;
+      dri_bo *surf_bo[BRW_WM_MAX_SURF];
 
-      GLuint prog_gs_offset;
-      GLuint state_gs_offset;
+      GLuint thread0_delta;
+      GLuint thread2_delta;
+      GLuint wm4_delta;
+      dri_bo *prog_bo;
+      dri_bo *state_bo;
    } wm;
 
 
    struct {
-      GLuint vp_gs_offset;
-      GLuint state_gs_offset;
+      dri_bo *prog_bo;
+      dri_bo *state_bo;
+      dri_bo *vp_bo;
    } cc;
 
    
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index 4007dbf9e9..5d81703d36 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -188,7 +188,6 @@ static void upload_constant_buffer(struct brw_context *brw)
    GLcontext *ctx = &brw->intel.ctx;
    struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program;
    struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program;
-   struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL];
    GLuint sz = brw->curbe.total_size;
    GLuint bufsz = sz * 16 * sizeof(GLfloat);
    GLfloat *buf;
@@ -291,7 +290,6 @@ static void upload_constant_buffer(struct brw_context *brw)
        bufsz == brw->curbe.last_bufsz &&
        memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
       free(buf);
-/*       return; */
    } 
    else {
       if (brw->curbe.last_buf)
@@ -299,20 +297,16 @@ static void upload_constant_buffer(struct brw_context *brw)
       brw->curbe.last_buf = buf;
       brw->curbe.last_bufsz = bufsz;
 
-      
-      if (!brw_pool_alloc(pool, 
-			  bufsz,
-			  1 << 6,
-			  &brw->curbe.gs_offset)) {
-	 _mesa_printf("out of GS memory for curbe\n");
-	 assert(0);
-	 return;
-      }
-            
+      dri_bo_unreference(brw->curbe.curbe_bo);
+      brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE",
+					 bufsz, 1 << 6,
+					 DRM_BO_FLAG_MEM_LOCAL |
+					 DRM_BO_FLAG_CACHED |
+					 DRM_BO_FLAG_CACHED_MAPPED);
 
       /* Copy data to the buffer:
        */
-      dri_bo_subdata(pool->buffer, brw->curbe.gs_offset, bufsz, buf);
+      dri_bo_subdata(brw->curbe.curbe_bo, 0, bufsz, buf);
    }
 
    /* Because this provokes an action (ie copy the constants into the
@@ -330,7 +324,8 @@ static void upload_constant_buffer(struct brw_context *brw)
     */
    BEGIN_BATCH(2, INTEL_BATCH_NO_CLIPRECTS);
    OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2));
-   OUT_BATCH(brw->curbe.gs_offset | (sz - 1));
+   OUT_RELOC(brw->curbe.curbe_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+	     (sz - 1));
    ADVANCE_BATCH();
 }
 
@@ -350,6 +345,7 @@ const struct brw_tracked_state brw_constant_buffer = {
 	       BRW_NEW_CURBE_OFFSETS),
       .cache = (CACHE_NEW_WM_PROG) 
    },
-   .update = upload_constant_buffer
+   .update = upload_constant_buffer,
+   .always_update = GL_TRUE, /* Has a relocation in the batchbuffer */
 };
 
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index b2c6aa7dba..eeba8e0a17 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -476,7 +476,9 @@ void brw_draw_init( struct brw_context *brw )
 	 struct intel_buffer_object *intel_bo =
 	    intel_buffer_object(brw->vb.upload.vbo[i]);
 
-	 dri_bo_fake_disable_backing_store(intel_bufferobj_buffer(intel_bo),
+	 dri_bo_fake_disable_backing_store(intel_bufferobj_buffer(&brw->intel,
+								  intel_bo,
+								  INTEL_READ),
 					   NULL, NULL);
       }
    }
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index c0da290d5c..b0042a5449 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -42,7 +42,7 @@
 #include "intel_ioctl.h"
 #include "intel_batchbuffer.h"
 #include "intel_buffer_objects.h"
-
+#include "intel_tex.h"
 
 struct brw_array_state {
    union header_union header;
@@ -68,9 +68,11 @@ struct brw_array_state {
 };
 
 
-static dri_bo *array_buffer( const struct gl_client_array *array )
+static dri_bo *array_buffer( struct intel_context *intel,
+			     const struct gl_client_array *array )
 {
-   return intel_bufferobj_buffer(intel_buffer_object(array->BufferObj));
+   return intel_bufferobj_buffer(intel, intel_buffer_object(array->BufferObj),
+				 INTEL_WRITE_PART);
 }
 
 static GLuint double_types[5] = {
@@ -253,7 +255,7 @@ static void copy_strided_array( GLubyte *dest,
 				GLuint count )
 {
    if (size == stride) 
-      do_memcpy(dest, src, count * size);
+      memcpy(dest, src, count * size);
    else {
       GLuint i,j;
    
@@ -525,7 +527,7 @@ GLboolean brw_upload_vertices( struct brw_context *brw,
       vbp.vb[i].vb0.bits.access_type = BRW_VERTEXBUFFER_ACCESS_VERTEXDATA;
       vbp.vb[i].vb0.bits.vb_index = i;
       vbp.vb[i].offset = (GLuint)input->glarray->Ptr;
-      vbp.vb[i].buffer = array_buffer(input->glarray);
+      vbp.vb[i].buffer = array_buffer(intel, input->glarray);
       vbp.vb[i].max_index = max_index;
    }
 
@@ -555,19 +557,6 @@ GLboolean brw_upload_vertices( struct brw_context *brw,
    return GL_TRUE;
 }
 
-
-static GLuint element_size( GLenum type )
-{
-   switch(type) {
-   case GL_UNSIGNED_INT: return 4;
-   case GL_UNSIGNED_SHORT: return 2;
-   case GL_UNSIGNED_BYTE: return 1;
-   default: assert(0); return 0;
-   }
-}
-
-
-
 void brw_upload_indices( struct brw_context *brw,
 			 const struct _mesa_index_buffer *index_buffer )
 {
@@ -621,7 +610,9 @@ void brw_upload_indices( struct brw_context *brw,
     */
    {
       struct brw_indexbuffer ib;
-      dri_bo *buffer = intel_bufferobj_buffer(intel_buffer_object(bufferobj));
+      dri_bo *buffer = intel_bufferobj_buffer(intel,
+					      intel_buffer_object(bufferobj),
+					      INTEL_READ);
 
       memset(&ib, 0, sizeof(ib));
    
diff --git a/src/mesa/drivers/dri/i965/brw_fallback.c b/src/mesa/drivers/dri/i965/brw_fallback.c
index 3925dcf37b..3c4c60a3ea 100644
--- a/src/mesa/drivers/dri/i965/brw_fallback.c
+++ b/src/mesa/drivers/dri/i965/brw_fallback.c
@@ -45,7 +45,8 @@ static GLboolean do_check_fallback(struct brw_context *brw)
 {
    GLcontext *ctx = &brw->intel.ctx;
    GLuint i;
-   
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+
    /* BRW_NEW_METAOPS
     */
    if (brw->metaops.active)
@@ -58,11 +59,15 @@ static GLboolean do_check_fallback(struct brw_context *brw)
 
    /* _NEW_BUFFERS
     */
-   if (ctx->DrawBuffer->_ColorDrawBufferMask[0] != BUFFER_BIT_FRONT_LEFT &&
-       ctx->DrawBuffer->_ColorDrawBufferMask[0] != BUFFER_BIT_BACK_LEFT) {
-      DBG("FALLBACK: draw buffer %d: 0x%08x\n",
-	  ctx->DrawBuffer->_ColorDrawBufferMask[0]);
-      return GL_TRUE;
+   /* We can only handle a single draw buffer at the moment, and only as the
+    * first color buffer.
+    */
+   for (i = 0; i < MAX_DRAW_BUFFERS; i++) {
+      if (fb->_NumColorDrawBuffers[i] > (i == 0 ? 1 : 0)) {
+	 DBG("FALLBACK: draw buffer %d: 0x%08x\n",
+	     i, ctx->DrawBuffer->_ColorDrawBufferMask[i]);
+	 return GL_TRUE;
+      }
    }
 
    /* _NEW_RENDERMODE
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index 73263a5fff..5c52212a3b 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -119,26 +119,15 @@ static void compile_gs_prog( struct brw_context *brw,
 
    /* Upload
     */
-   brw->gs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_GS_PROG],
-					      &c.key,
-					      sizeof(c.key),
-					      program,
-					      program_size,
-					      &c.prog_data,
-					      &brw->gs.prog_data );
+   dri_bo_unreference(brw->gs.prog_bo);
+   brw->gs.prog_bo = brw_upload_cache( &brw->cache, BRW_GS_PROG,
+				       &c.key, sizeof(c.key),
+				       NULL, 0,
+				       program, program_size,
+				       &c.prog_data,
+				       &brw->gs.prog_data );
 }
 
-
-static GLboolean search_cache( struct brw_context *brw, 
-			       struct brw_gs_prog_key *key )
-{
-   return brw_search_cache(&brw->cache[BRW_GS_PROG], 
-			   key, sizeof(*key),
-			   &brw->gs.prog_data,
-			   &brw->gs.prog_gs_offset);
-}
-
-
 static const GLenum gs_prim[GL_POLYGON+1] = {  
    GL_POINTS,
    GL_LINES,
@@ -187,7 +176,12 @@ static void upload_gs_prog( struct brw_context *brw )
    }
 
    if (brw->gs.prog_active) {
-      if (!search_cache(brw, &key))
+      dri_bo_unreference(brw->gs.prog_bo);
+      brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG,
+					 &key, sizeof(key),
+					 NULL, 0,
+					 &brw->gs.prog_data);
+      if (brw->gs.prog_bo == NULL)
 	 compile_gs_prog( brw, &key );
    }
 }
diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c
index 5db4dd4603..6bbf11e253 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_state.c
@@ -48,7 +48,8 @@ static void upload_gs_unit( struct brw_context *brw )
    if (brw->gs.prog_active) {
       gs.thread0.grf_reg_count =
 	 ALIGN(brw->gs.prog_data->total_grf, 16) / 16 - 1;
-      gs.thread0.kernel_start_pointer = brw->gs.prog_gs_offset >> 6;
+      /* reloc */
+      gs.thread0.kernel_start_pointer = brw->gs.prog_bo->offset >> 6;
       gs.thread3.urb_entry_read_length = brw->gs.prog_data->urb_read_length;
    }
    else {
@@ -73,11 +74,25 @@ static void upload_gs_unit( struct brw_context *brw )
    gs.thread3.const_urb_entry_read_offset = 0;
    gs.thread3.const_urb_entry_read_length = 0;
    gs.thread3.urb_entry_read_offset = 0;
-   
 
-   brw->gs.state_gs_offset = brw_cache_data( &brw->cache[BRW_GS_UNIT], &gs );
+   brw->gs.thread0_delta = gs.thread0.grf_reg_count << 1;
+
+   dri_bo_unreference(brw->gs.state_bo);
+   brw->gs.state_bo = brw_cache_data( &brw->cache, BRW_GS_UNIT, &gs,
+				      &brw->gs.prog_bo, 1 );
 }
 
+static void emit_reloc_gs_unit(struct brw_context *brw)
+{
+   if (brw->gs.prog_active) {
+      /* Emit GS program relocation */
+      dri_emit_reloc(brw->gs.state_bo,
+		     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+		     brw->gs.thread0_delta,
+		     offsetof(struct brw_gs_unit_state, thread0),
+		     brw->gs.prog_bo);
+   }
+}
 
 const struct brw_tracked_state brw_gs_unit = {
    .dirty = {
@@ -86,5 +101,6 @@ const struct brw_tracked_state brw_gs_unit = {
 		BRW_NEW_URB_FENCE),
       .cache = CACHE_NEW_GS_PROG
    },
-   .update = upload_gs_unit
+   .update = upload_gs_unit,
+   .emit_reloc = emit_reloc_gs_unit,
 };
diff --git a/src/mesa/drivers/dri/i965/brw_metaops.c b/src/mesa/drivers/dri/i965/brw_metaops.c
index 6e030f191e..87da464f1b 100644
--- a/src/mesa/drivers/dri/i965/brw_metaops.c
+++ b/src/mesa/drivers/dri/i965/brw_metaops.c
@@ -376,8 +376,7 @@ static void meta_draw_quad(struct intel_context *intel,
 			   GLfloat x0, GLfloat x1,
 			   GLfloat y0, GLfloat y1, 
 			   GLfloat z,
-			   GLubyte red, GLubyte green,
-			   GLubyte blue, GLubyte alpha,
+			   GLuint color,
 			   GLfloat s0, GLfloat s1,
 			   GLfloat t0, GLfloat t1)
 {
@@ -388,7 +387,6 @@ static void meta_draw_quad(struct intel_context *intel,
    struct gl_client_array *attribs[VERT_ATTRIB_MAX];
    struct _mesa_prim prim[1];
    GLfloat pos[4][3];
-   GLubyte color[4];
 
    ctx->Driver.BufferData(ctx,
 			  GL_ARRAY_BUFFER_ARB,
@@ -413,7 +411,6 @@ static void meta_draw_quad(struct intel_context *intel,
    pos[3][1] = y1;
    pos[3][2] = z;
 
-
    ctx->Driver.BufferSubData(ctx,
 			     GL_ARRAY_BUFFER_ARB,
 			     0,
@@ -421,16 +418,14 @@ static void meta_draw_quad(struct intel_context *intel,
 			     pos,
 			     brw->metaops.vbo);
 
-   color[0] = red;
-   color[1] = green;
-   color[2] = blue;
-   color[3] = alpha;
+   /* Convert incoming ARGB to required RGBA */
+   color = (color >> 24) | (color << 8);
 
    ctx->Driver.BufferSubData(ctx,
 			     GL_ARRAY_BUFFER_ARB,
 			     sizeof(pos),
 			     sizeof(color),
-			     color,
+			     &color,
 			     brw->metaops.vbo);
 
    /* Ignoring texture coords. 
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 210745c63b..15e4e61244 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -77,7 +77,6 @@ const struct brw_tracked_state brw_blend_constant_color = {
 static void upload_drawing_rect(struct brw_context *brw)
 {
    struct intel_context *intel = &brw->intel;
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;
    struct brw_drawrect bdr;
    int x1, y1;
    int x2, y2;
@@ -105,8 +104,8 @@ static void upload_drawing_rect(struct brw_context *brw)
    bdr.ymin = y1;
    bdr.xmax = x2;
    bdr.ymax = y2;
-   bdr.xorg = dPriv->x;
-   bdr.yorg = dPriv->y;
+   bdr.xorg = intel->drawX;
+   bdr.yorg = intel->drawY;
 
    /* Can't use BRW_CACHED_BATCH_STRUCT because this is also emitted
     * uncached in brw_draw.c:
@@ -128,31 +127,25 @@ const struct brw_tracked_state brw_drawing_rect = {
  * state pointers.
  *
  * The binding table pointers are relative to the surface state base address,
- * which is the BRW_SS_POOL cache buffer.
+ * which is 0.
  */
 static void upload_binding_table_pointers(struct brw_context *brw)
 {
-   struct brw_binding_table_pointers btp;
-   memset(&btp, 0, sizeof(btp));
-
-   btp.header.opcode = CMD_BINDING_TABLE_PTRS;
-   btp.header.length = sizeof(btp)/4 - 2;
-   btp.vs = 0;
-   btp.gs = 0;
-   btp.clp = 0;
-   btp.sf = 0;
-   btp.wm = brw->wm.bind_ss_offset;
-
-   BRW_CACHED_BATCH_STRUCT(brw, &btp);
+   struct intel_context *intel = &brw->intel;
+
+   BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS);
+   OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
+   OUT_BATCH(0); /* vs */
+   OUT_BATCH(0); /* gs */
+   OUT_BATCH(0); /* clip */
+   OUT_BATCH(0); /* sf */
+   OUT_RELOC(brw->wm.bind_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0);
+   ADVANCE_BATCH();
 }
 
 const struct brw_tracked_state brw_binding_table_pointers = {
-   .dirty = {
-      .mesa = 0,
-      .brw = 0,
-      .cache = CACHE_NEW_SURF_BIND 
-   },
-   .update = upload_binding_table_pointers
+   .update = upload_binding_table_pointers,
+   .always_update = GL_TRUE, /* Has a relocation in the batchbuffer */
 };
 
 
@@ -160,39 +153,33 @@ const struct brw_tracked_state brw_binding_table_pointers = {
  * Upload pointers to the per-stage state.
  *
  * The state pointers in this packet are all relative to the general state
- * base address set by CMD_STATE_BASE_ADDRESS, which is the BRW_GS_POOL buffer.
+ * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
  */
 static void upload_pipelined_state_pointers(struct brw_context *brw )
 {
-   struct brw_pipelined_state_pointers psp;
-   memset(&psp, 0, sizeof(psp));
-
-   psp.header.opcode = CMD_PIPELINED_STATE_POINTERS;
-   psp.header.length = sizeof(psp)/4 - 2;
-
-   psp.vs.offset = brw->vs.state_gs_offset >> 5;
-   psp.sf.offset = brw->sf.state_gs_offset >> 5;
-   psp.wm.offset = brw->wm.state_gs_offset >> 5;
-   psp.cc.offset = brw->cc.state_gs_offset >> 5;
-
-   /* GS gets turned on and off regularly.  Need to re-emit URB fence
-    * after this occurs.  
-    */
-   if (brw->gs.prog_active) {
-      psp.gs.offset = brw->gs.state_gs_offset >> 5;
-      psp.gs.enable = 1;
-   }
-
-   if (!brw->metaops.active) {
-      psp.clp.offset = brw->clip.state_gs_offset >> 5;
-      psp.clp.enable = 1;
-   }
+   struct intel_context *intel = &brw->intel;
 
+   BEGIN_BATCH(7, INTEL_BATCH_NO_CLIPRECTS);
+   OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2));
+   OUT_RELOC(brw->vs.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0);
+   if (brw->gs.prog_active)
+      OUT_RELOC(brw->gs.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 1);
+   else
+      OUT_BATCH(0);
+   if (!brw->metaops.active)
+      OUT_RELOC(brw->clip.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 1);
+   else
+      OUT_BATCH(0);
+   OUT_RELOC(brw->sf.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0);
+   OUT_RELOC(brw->wm.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0);
+   OUT_RELOC(brw->cc.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0);
+   ADVANCE_BATCH();
 
-   if (BRW_CACHED_BATCH_STRUCT(brw, &psp))
-      brw->state.dirty.brw |= BRW_NEW_PSP;
+   brw->state.dirty.brw |= BRW_NEW_PSP;
 }
 
+#if 0
+/* Combined into brw_psp_urb_cbs */
 const struct brw_tracked_state brw_pipelined_state_pointers = {
    .dirty = {
       .mesa = 0,
@@ -206,7 +193,9 @@ const struct brw_tracked_state brw_pipelined_state_pointers = {
 		CACHE_NEW_CC_UNIT)
    },
    .update = upload_pipelined_state_pointers
+   .always_update = GL_TRUE, /* Has a relocation in the batchbuffer */
 };
+#endif
 
 static void upload_psp_urb_cbs(struct brw_context *brw )
 {
@@ -228,7 +217,8 @@ const struct brw_tracked_state brw_psp_urb_cbs = {
 		CACHE_NEW_WM_UNIT | 
 		CACHE_NEW_CC_UNIT)
    },
-   .update = upload_psp_urb_cbs
+   .update = upload_psp_urb_cbs,
+   .always_update = GL_TRUE, /* psp has relocations. */
 };
 
 /**
@@ -242,37 +232,48 @@ static void upload_depthbuffer(struct brw_context *brw)
    struct intel_context *intel = &brw->intel;
    struct intel_region *region = brw->state.depth_region;
 
-   unsigned int format;
-
-   switch (region->cpp) {
-   case 2:
-      format = BRW_DEPTHFORMAT_D16_UNORM;
-      break;
-   case 4:
-      if (intel->depth_buffer_is_float)
-	 format = BRW_DEPTHFORMAT_D32_FLOAT;
-      else
-	 format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
-      break;
-   default:
-      assert(0);
-      return;
+   if (region == NULL) {
+      BEGIN_BATCH(5, INTEL_BATCH_NO_CLIPRECTS);
+      OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (5 - 2));
+      OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
+		(BRW_SURFACE_NULL << 29));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   } else {
+      unsigned int format;
+
+      switch (region->cpp) {
+      case 2:
+	 format = BRW_DEPTHFORMAT_D16_UNORM;
+	 break;
+      case 4:
+	 if (intel->depth_buffer_is_float)
+	    format = BRW_DEPTHFORMAT_D32_FLOAT;
+	 else
+	    format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
+	 break;
+      default:
+	 assert(0);
+	 return;
+      }
+
+      BEGIN_BATCH(5, INTEL_BATCH_NO_CLIPRECTS);
+      OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (5 - 2));
+      OUT_BATCH(((region->pitch * region->cpp) - 1) |
+		(format << 18) |
+		(BRW_TILEWALK_YMAJOR << 26) |
+		(region->tiled << 27) |
+		(BRW_SURFACE_2D << 29));
+      OUT_RELOC(region->buffer,
+		DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, 0);
+      OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
+		((region->pitch - 1) << 6) |
+		((region->height - 1) << 19));
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
    }
-
-   BEGIN_BATCH(5, INTEL_BATCH_NO_CLIPRECTS);
-   OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (5 - 2));
-   OUT_BATCH(((region->pitch * region->cpp) - 1) |
-	     (format << 18) |
-	     (BRW_TILEWALK_YMAJOR << 26) |
-	     (region->tiled << 27) |
-	     (BRW_SURFACE_2D << 29));
-   OUT_RELOC(region->buffer,
-	     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, 0);
-   OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
-	     ((region->pitch - 1) << 6) |
-	     ((region->height - 1) << 19));
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
 }
 
 const struct brw_tracked_state brw_depthbuffer = {
@@ -491,20 +492,19 @@ static void upload_state_base_address( struct brw_context *brw )
     */
    BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS);
    OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
-   OUT_RELOC(brw->pool[BRW_GS_POOL].buffer,
-	     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
-	     1); /* General state base address */
-   OUT_RELOC(brw->pool[BRW_SS_POOL].buffer,
-	     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
-	     1); /* Surface state base address */
+   OUT_BATCH(1); /* General state base address */
+   OUT_BATCH(1); /* Surface state base address */
    OUT_BATCH(1); /* Indirect object base address */
    OUT_BATCH(1); /* General state upper bound */
    OUT_BATCH(1); /* Indirect object upper bound */
    ADVANCE_BATCH();
 }
 
-
 const struct brw_tracked_state brw_state_base_address = {
-   .always_update = GL_TRUE,
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_CONTEXT,
+      .cache = 0,
+   },
    .update = upload_state_base_address
 };
diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c
index 738ceb0552..6c2f174bf5 100644
--- a/src/mesa/drivers/dri/i965/brw_sf.c
+++ b/src/mesa/drivers/dri/i965/brw_sf.c
@@ -87,18 +87,18 @@ static void compile_sf_prog( struct brw_context *brw,
    switch (key->primitive) {
    case SF_TRIANGLES:
       c.nr_verts = 3;
-      brw_emit_tri_setup( &c );
+      brw_emit_tri_setup( &c, GL_TRUE );
       break;
    case SF_LINES:
       c.nr_verts = 2;
-      brw_emit_line_setup( &c );
+      brw_emit_line_setup( &c, GL_TRUE );
       break;
    case SF_POINTS:
       c.nr_verts = 1;
       if (key->do_point_sprite)
-	  brw_emit_point_sprite_setup( &c );
+	  brw_emit_point_sprite_setup( &c, GL_TRUE );
       else
-	  brw_emit_point_setup( &c );
+	  brw_emit_point_setup( &c, GL_TRUE );
       break;
    case SF_UNFILLED_TRIS:
       c.nr_verts = 3;
@@ -116,26 +116,15 @@ static void compile_sf_prog( struct brw_context *brw,
 
    /* Upload
     */
-   brw->sf.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_SF_PROG],
-					      &c.key,
-					      sizeof(c.key),
-					      program,
-					      program_size,
-					      &c.prog_data,
-					      &brw->sf.prog_data );
+   dri_bo_unreference(brw->sf.prog_bo);
+   brw->sf.prog_bo = brw_upload_cache( &brw->cache, BRW_SF_PROG,
+				       &c.key, sizeof(c.key),
+				       NULL, 0,
+				       program, program_size,
+				       &c.prog_data,
+				       &brw->sf.prog_data );
 }
 
-
-static GLboolean search_cache( struct brw_context *brw, 
-			       struct brw_sf_prog_key *key )
-{
-   return brw_search_cache(&brw->cache[BRW_SF_PROG], 
-			   key, sizeof(*key),
-			   &brw->sf.prog_data,
-			   &brw->sf.prog_gs_offset);
-}
-
-
 /* Calculate interpolants for triangle and line rasterization.
  */
 static void upload_sf_prog( struct brw_context *brw )
@@ -180,8 +169,12 @@ static void upload_sf_prog( struct brw_context *brw )
    if (key.do_twoside_color)
       key.frontface_ccw = (brw->attribs.Polygon->FrontFace == GL_CCW);
 
-
-   if (!search_cache(brw, &key))
+   dri_bo_unreference(brw->sf.prog_bo);
+   brw->sf.prog_bo = brw_search_cache(&brw->cache, BRW_SF_PROG,
+				      &key, sizeof(key),
+				      NULL, 0,
+				      &brw->sf.prog_data);
+   if (brw->sf.prog_bo == NULL)
       compile_sf_prog( brw, &key );
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_sf.h b/src/mesa/drivers/dri/i965/brw_sf.h
index 385f1eea13..1c0fb70fe0 100644
--- a/src/mesa/drivers/dri/i965/brw_sf.h
+++ b/src/mesa/drivers/dri/i965/brw_sf.h
@@ -103,10 +103,10 @@ struct brw_sf_compile {
 };
 
  
-void brw_emit_tri_setup( struct brw_sf_compile *c );
-void brw_emit_line_setup( struct brw_sf_compile *c );
-void brw_emit_point_setup( struct brw_sf_compile *c );
-void brw_emit_point_sprite_setup( struct brw_sf_compile *c );
+void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate );
 void brw_emit_anyprim_setup( struct brw_sf_compile *c );
 
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c
index 5e86e428fa..080a02b730 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c
@@ -343,13 +343,16 @@ static GLboolean calculate_masks( struct brw_sf_compile *c,
 
 
 
-void brw_emit_tri_setup( struct brw_sf_compile *c )
+void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate)
 {
    struct brw_compile *p = &c->func;
    GLuint i;
 
    c->nr_verts = 3;
-   alloc_regs(c);
+
+   if (allocate)
+      alloc_regs(c);
+
    invert_det(c);
    copy_z_inv_w(c);
 
@@ -428,14 +431,17 @@ void brw_emit_tri_setup( struct brw_sf_compile *c )
 
 
 
-void brw_emit_line_setup( struct brw_sf_compile *c )
+void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate)
 {
    struct brw_compile *p = &c->func;
    GLuint i;
 
 
    c->nr_verts = 2;
-   alloc_regs(c);
+
+   if (allocate)
+      alloc_regs(c);
+
    invert_det(c);
    copy_z_inv_w(c);
 
@@ -497,13 +503,16 @@ void brw_emit_line_setup( struct brw_sf_compile *c )
    } 
 }
 
-void brw_emit_point_sprite_setup( struct brw_sf_compile *c )
+void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate)
 {
    struct brw_compile *p = &c->func;
    GLuint i;
 
    c->nr_verts = 1;
-   alloc_regs(c);
+
+   if (allocate)
+      alloc_regs(c);
+
    copy_z_inv_w(c);
    for (i = 0; i < c->nr_setup_regs; i++)
    {
@@ -581,13 +590,16 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c )
 /* Points setup - several simplifications as all attributes are
  * constant across the face of the point (point sprites excluded!)
  */
-void brw_emit_point_setup( struct brw_sf_compile *c )
+void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate)
 {
    struct brw_compile *p = &c->func;
    GLuint i;
 
    c->nr_verts = 1;
-   alloc_regs(c);
+   
+   if (allocate)
+      alloc_regs(c);
+
    copy_z_inv_w(c);
 
    brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */
@@ -645,7 +657,10 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c )
    struct brw_reg primmask;
    struct brw_instruction *jmp;
    struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
+   
+   GLuint saveflag;
 
+   c->nr_verts = 3;
    alloc_regs(c);
 
    primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);
@@ -663,10 +678,15 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c )
 					       (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
    jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
    {
+      saveflag = p->flag_value;
       brw_push_insn_state(p); 
-      brw_emit_tri_setup( c );
+      brw_emit_tri_setup( c, GL_FALSE );
       brw_pop_insn_state(p);
-      /* note - thread killed in subroutine */
+      p->flag_value = saveflag;
+      /* note - thread killed in subroutine, so must
+       * restore the flag which is changed when building
+       * the subroutine. fix #13240
+       */
    }
    brw_land_fwd_jump(p, jmp);
 
@@ -679,9 +699,11 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c )
 					       (1<<_3DPRIM_LINESTRIP_CONT_BF)));
    jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
    {
+      saveflag = p->flag_value;
       brw_push_insn_state(p); 
-      brw_emit_line_setup( c );
+      brw_emit_line_setup( c, GL_FALSE );
       brw_pop_insn_state(p);
+      p->flag_value = saveflag;
       /* note - thread killed in subroutine */
    }
    brw_land_fwd_jump(p, jmp); 
@@ -690,13 +712,15 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c )
    brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
    jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
    {
+      saveflag = p->flag_value;
       brw_push_insn_state(p); 
-      brw_emit_point_sprite_setup( c );
+      brw_emit_point_sprite_setup( c, GL_FALSE );
       brw_pop_insn_state(p);
+      p->flag_value = saveflag;
    }
    brw_land_fwd_jump(p, jmp); 
 
-   brw_emit_point_setup( c );
+   brw_emit_point_setup( c, GL_FALSE );
 }
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c
index 2257916aae..e1304d3144 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_state.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_state.c
@@ -35,69 +35,69 @@
 #include "brw_state.h"
 #include "brw_defines.h"
 #include "macros.h"
+#include "intel_fbo.h"
 
 static void upload_sf_vp(struct brw_context *brw)
 {
+   GLcontext *ctx = &brw->intel.ctx;
+   const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
    struct brw_sf_viewport sfv;
+   struct intel_renderbuffer *irb =
+      intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0][0]);
+   GLfloat y_scale, y_bias;
 
    memset(&sfv, 0, sizeof(sfv));
-   
-   if (brw->intel.driDrawable) 
-   {
-      /* _NEW_VIEWPORT, BRW_NEW_METAOPS */
-
-      if (!brw->metaops.active) {
-	 const GLfloat *v = brw->intel.ctx.Viewport._WindowMap.m;
-	 
-	 sfv.viewport.m00 =   v[MAT_SX];
-	 sfv.viewport.m11 = - v[MAT_SY];
-	 sfv.viewport.m22 =   v[MAT_SZ] * brw->intel.depth_scale;
-	 sfv.viewport.m30 =   v[MAT_TX];
-	 sfv.viewport.m31 = - v[MAT_TY] + brw->intel.driDrawable->h;
-	 sfv.viewport.m32 =   v[MAT_TZ] * brw->intel.depth_scale;
-      }
-      else {
-	 sfv.viewport.m00 =   1;
-	 sfv.viewport.m11 = - 1;
-	 sfv.viewport.m22 =   1;
-	 sfv.viewport.m30 =   0;
-	 sfv.viewport.m31 =   brw->intel.driDrawable->h;
-	 sfv.viewport.m32 =   0;
+
+   if (ctx->DrawBuffer->Name) {
+      /* User-created FBO */
+      if (irb && !irb->RenderToTexture) {
+	 y_scale = -1.0;
+	 y_bias = ctx->DrawBuffer->Height;
+      } else {
+	 y_scale = 1.0;
+	 y_bias = 0;
       }
+   } else {
+      y_scale = -1.0;
+      y_bias = ctx->DrawBuffer->Height;
    }
 
-   /* XXX: what state for this? */
-   if (brw->intel.driDrawable)
-   {
-      intelScreenPrivate *screen = brw->intel.intelScreen;
-      /* _NEW_SCISSOR */
-      GLint x = brw->attribs.Scissor->X;
-      GLint y = brw->attribs.Scissor->Y;
-      GLuint w = brw->attribs.Scissor->Width;
-      GLuint h = brw->attribs.Scissor->Height;
-
-      GLint x1 = x;
-      GLint y1 = brw->intel.driDrawable->h - (y + h);
-      GLint x2 = x + w - 1;
-      GLint y2 = y1 + h - 1;
-
-      if (x1 < 0) x1 = 0;
-      if (y1 < 0) y1 = 0;
-      if (x2 < 0) x2 = 0;
-      if (y2 < 0) y2 = 0;
-
-      if (x2 >= screen->width) x2 = screen->width-1;
-      if (y2 >= screen->height) y2 = screen->height-1;
-      if (x1 >= screen->width) x1 = screen->width-1;
-      if (y1 >= screen->height) y1 = screen->height-1;
-      
-      sfv.scissor.xmin = x1;
-      sfv.scissor.xmax = x2;
-      sfv.scissor.ymin = y1;
-      sfv.scissor.ymax = y2;
+   /* _NEW_VIEWPORT, BRW_NEW_METAOPS */
+
+   if (!brw->metaops.active) {
+      const GLfloat *v = ctx->Viewport._WindowMap.m;
+
+      sfv.viewport.m00 = v[MAT_SX];
+      sfv.viewport.m11 = v[MAT_SY] * y_scale;
+      sfv.viewport.m22 = v[MAT_SZ] * depth_scale;
+      sfv.viewport.m30 = v[MAT_TX];
+      sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias;
+      sfv.viewport.m32 = v[MAT_TZ] * depth_scale;
+   } else {
+      sfv.viewport.m00 =   1;
+      sfv.viewport.m11 = - 1;
+      sfv.viewport.m22 =   1;
+      sfv.viewport.m30 =   0;
+      sfv.viewport.m31 =   ctx->DrawBuffer->Height;
+      sfv.viewport.m32 =   0;
    }
 
-   brw->sf.vp_gs_offset = brw_cache_data( &brw->cache[BRW_SF_VP], &sfv );
+   /* _NEW_SCISSOR */
+
+   /* The scissor only needs to handle the intersection of drawable and
+    * scissor rect.  Clipping to the boundaries of static shared buffers
+    * for front/back/depth is covered by looping over cliprects in brw_draw.c.
+    *
+    * Note that the hardware's coordinates are inclusive, while Mesa's min is
+    * inclusive but max is exclusive.
+    */
+   sfv.scissor.xmin = ctx->DrawBuffer->_Xmin;
+   sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
+   sfv.scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax;
+   sfv.scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1;
+
+   dri_bo_unreference(brw->sf.vp_bo);
+   brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 );
 }
 
 const struct brw_tracked_state brw_sf_vp = {
@@ -116,10 +116,11 @@ static void upload_sf_unit( struct brw_context *brw )
 {
    struct brw_sf_unit_state sf;
    memset(&sf, 0, sizeof(sf));
+   dri_bo *reloc_bufs[2];
 
    /* CACHE_NEW_SF_PROG */
    sf.thread0.grf_reg_count = ALIGN(brw->sf.prog_data->total_grf, 16) / 16 - 1;
-   sf.thread0.kernel_start_pointer = brw->sf.prog_gs_offset >> 6;
+   sf.thread0.kernel_start_pointer = brw->sf.prog_bo->offset >> 6; /* reloc */
    sf.thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length;
 
    sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
@@ -138,7 +139,7 @@ static void upload_sf_unit( struct brw_context *brw )
       sf.thread4.stats_enable = 1; 
 
    /* CACHE_NEW_SF_VP */
-   sf.sf5.sf_viewport_state_offset = brw->sf.vp_gs_offset >> 5;
+   sf.sf5.sf_viewport_state_offset = brw->sf.vp_bo->offset >> 5; /* reloc */
    
    sf.sf5.viewport_transform = 1;
    
@@ -202,9 +203,33 @@ static void upload_sf_unit( struct brw_context *brw )
    sf.sf6.dest_org_vbias = 0x8;
    sf.sf6.dest_org_hbias = 0x8;
 
-   brw->sf.state_gs_offset = brw_cache_data( &brw->cache[BRW_SF_UNIT], &sf );
+   reloc_bufs[0] = brw->sf.prog_bo;
+   reloc_bufs[1] = brw->sf.vp_bo;
+
+   brw->sf.thread0_delta = sf.thread0.grf_reg_count << 1;
+   brw->sf.sf5_delta = sf.sf5.front_winding | (sf.sf5.viewport_transform << 1);
+
+   dri_bo_unreference(brw->sf.state_bo);
+   brw->sf.state_bo = brw_cache_data( &brw->cache, BRW_SF_UNIT, &sf,
+				      reloc_bufs, 2 );
 }
 
+static void emit_reloc_sf_unit(struct brw_context *brw)
+{
+   /* Emit SF program relocation */
+   dri_emit_reloc(brw->sf.state_bo,
+		  DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+		  brw->sf.thread0_delta,
+		  offsetof(struct brw_sf_unit_state, thread0),
+		  brw->sf.prog_bo);
+
+   /* Emit SF viewport relocation */
+   dri_emit_reloc(brw->sf.state_bo,
+		  DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+		  brw->sf.sf5_delta,
+		  offsetof(struct brw_sf_unit_state, sf5),
+		  brw->sf.vp_bo);
+}
 
 const struct brw_tracked_state brw_sf_unit = {
    .dirty = {
@@ -217,7 +242,6 @@ const struct brw_tracked_state brw_sf_unit = {
       .cache = (CACHE_NEW_SF_VP |
 		CACHE_NEW_SF_PROG)
    },
-   .update = upload_sf_unit
+   .update = upload_sf_unit,
+   .emit_reloc = emit_reloc_sf_unit,
 };
-
-
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index ef2409df5a..f0a740f456 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -83,35 +83,42 @@ const struct brw_tracked_state brw_clear_batch_cache;
 /***********************************************************************
  * brw_state_cache.c
  */
-GLuint brw_cache_data(struct brw_cache *cache,
-		      const void *data );
-
-GLuint brw_cache_data_sz(struct brw_cache *cache,
-			 const void *data,
-			 GLuint data_sz);
-
-GLuint brw_upload_cache( struct brw_cache *cache,
-			 const void *key,
-			 GLuint key_sz,
-			 const void *data,
-			 GLuint data_sz,
-			 const void *aux,
-			 void *aux_return );
-
-GLboolean brw_search_cache( struct brw_cache *cache,
-			    const void *key,
-			    GLuint key_size,
-			    void *aux_return,
-			    GLuint *offset_return);
-
-void brw_init_caches( struct brw_context *brw );
-void brw_destroy_caches( struct brw_context *brw );
-
-static inline dri_bo *brw_cache_buffer(struct brw_context *brw,
-				       enum brw_cache_id id)
-{
-   return brw->cache[id].pool->buffer;
-}
+dri_bo *brw_cache_data(struct brw_cache *cache,
+		       enum brw_cache_id cache_id,
+		       const void *data,
+		       dri_bo **reloc_bufs,
+		       GLuint nr_reloc_bufs);
+
+dri_bo *brw_cache_data_sz(struct brw_cache *cache,
+			  enum brw_cache_id cache_id,
+			  const void *data,
+			  GLuint data_size,
+			  dri_bo **reloc_bufs,
+			  GLuint nr_reloc_bufs);
+
+dri_bo *brw_upload_cache( struct brw_cache *cache,
+			  enum brw_cache_id cache_id,
+			  const void *key,
+			  GLuint key_sz,
+			  dri_bo **reloc_bufs,
+			  GLuint nr_reloc_bufs,
+			  const void *data,
+			  GLuint data_sz,
+			  const void *aux,
+			  void *aux_return );
+
+dri_bo *brw_search_cache( struct brw_cache *cache,
+			  enum brw_cache_id cache_id,
+			  const void *key,
+			  GLuint key_size,
+			  dri_bo **reloc_bufs,
+			  GLuint nr_reloc_bufs,
+			  void *aux_return);
+void brw_clear_cache( struct brw_context *brw );
+void brw_state_cache_check_size( struct brw_context *brw );
+
+void brw_init_cache( struct brw_context *brw );
+void brw_destroy_cache( struct brw_context *brw );
 
 /***********************************************************************
  * brw_state_batch.c
@@ -122,31 +129,7 @@ static inline dri_bo *brw_cache_buffer(struct brw_context *brw,
 GLboolean brw_cached_batch_struct( struct brw_context *brw,
 				   const void *data,
 				   GLuint sz );
-
 void brw_destroy_batch_cache( struct brw_context *brw );
-
-
-/***********************************************************************
- * brw_state_pool.c
- */
-void brw_init_pools( struct brw_context *brw );
-void brw_destroy_pools( struct brw_context *brw );
-
-GLboolean brw_pool_alloc( struct brw_mem_pool *pool,
-			  GLuint size,
-			  GLuint alignment,
-			  GLuint *offset_return);
-
-void brw_pool_fence( struct brw_context *brw,
-		     struct brw_mem_pool *pool,
-		     GLuint fence );
-
-
-void brw_pool_check_wrap( struct brw_context *brw,
-			  struct brw_mem_pool *pool );
-
-void brw_clear_all_caches( struct brw_context *brw );
-void brw_invalidate_pools( struct brw_context *brw );
 void brw_clear_batch_cache_flush( struct brw_context *brw );
 
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c
index eabda257d3..34be101273 100644
--- a/src/mesa/drivers/dri/i965/brw_state_batch.c
+++ b/src/mesa/drivers/dri/i965/brw_state_batch.c
@@ -91,11 +91,6 @@ static void clear_batch_cache( struct brw_context *brw )
    }
 
    brw->cached_batch_items = NULL;
-
-
-   brw_clear_all_caches(brw);
-
-   brw_invalidate_pools(brw);
 }
 
 void brw_clear_batch_cache_flush( struct brw_context *brw )
diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
index 618e445546..d614316ab6 100644
--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@@ -28,7 +28,33 @@
   * Authors:
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
-      
+
+/** @file brw_state_cache.c
+ *
+ * This file implements a simple static state cache for 965.  The consumers
+ * can query the hash table of state using a cache_id, opaque key data,
+ * and list of buffers that will be used in relocations, and receive the
+ * corresponding state buffer object of state (plus associated auxiliary
+ * data) in return.
+ *
+ * The inner workings are a simple hash table based on a CRC of the key data.
+ * The cache_id and relocation target buffers associated with the state
+ * buffer are included as auxiliary key data, but are not part of the hash
+ * value (this should be fixed, but will likely be fixed instead by making
+ * consumers use structured keys).
+ *
+ * Replacement is not implemented.  Instead, when the cache gets too big, at
+ * a safe point (unlock) we throw out all of the cache data let it regenerate
+ * it for the next rendering operation.
+ *
+ * The reloc_buf pointers need to be included as key data, otherwise the
+ * non-unique values stuffed in the offset in key data through
+ * brw_cache_data() may result in successful probe for state buffers
+ * even when the buffer being referenced doesn't match.  The result would be
+ * that the same state cache entry is used twice for different buffers,
+ * only one of the two buffers referenced gets put into the offset, and the
+ * incorrect program is run for the other instance.
+ */
 
 #include "brw_state.h"
 #include "intel_batchbuffer.h"
@@ -43,15 +69,6 @@
 #include "brw_sf.h"
 #include "brw_gs.h"
 
-
-/***********************************************************************
- * Check cache for uploaded version of struct, else upload new one.
- * Fail when memory is exhausted.
- *
- * XXX: FIXME: Currently search is so slow it would be quicker to
- * regenerate the data every time...
- */
-
 static GLuint hash_key( const void *key, GLuint key_size )
 {
    GLuint *ikey = (GLuint *)key;
@@ -67,17 +84,34 @@ static GLuint hash_key( const void *key, GLuint key_size )
    return hash;
 }
 
-static struct brw_cache_item *search_cache( struct brw_cache *cache,
-					     GLuint hash,
-					     const void *key,
-					     GLuint key_size)
+/**
+ * Marks a new buffer as being chosen for the given cache id.
+ */
+static void
+update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id,
+		  dri_bo *bo)
+{
+   dri_bo_unreference(cache->last_bo[cache_id]);
+   cache->last_bo[cache_id] = bo;
+   dri_bo_reference(cache->last_bo[cache_id]);
+   cache->brw->state.dirty.cache |= 1 << cache_id;
+}
+
+static struct brw_cache_item *
+search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
+	     GLuint hash, const void *key, GLuint key_size,
+	     dri_bo **reloc_bufs, GLuint nr_reloc_bufs)
 {
    struct brw_cache_item *c;
 
    for (c = cache->items[hash % cache->size]; c; c = c->next) {
-      if (c->hash == hash && 
+      if (c->cache_id == cache_id &&
+	  c->hash == hash &&
 	  c->key_size == key_size &&
-	  memcmp(c->key, key, key_size) == 0)
+	  memcmp(c->key, key, key_size) == 0 &&
+	  c->nr_reloc_bufs == nr_reloc_bufs &&
+	  memcmp(c->reloc_bufs, reloc_bufs,
+		 nr_reloc_bufs * sizeof(dri_bo *)) == 0)
 	 return c;
    }
 
@@ -92,8 +126,7 @@ static void rehash( struct brw_cache *cache )
    GLuint size, i;
 
    size = cache->size * 3;
-   items = (struct brw_cache_item**) _mesa_malloc(size * sizeof(*items));
-   _mesa_memset(items, 0, size * sizeof(*items));
+   items = (struct brw_cache_item**) _mesa_calloc(size * sizeof(*items));
 
    for (i = 0; i < cache->size; i++)
       for (c = cache->items[i]; c; c = next) {
@@ -107,116 +140,156 @@ static void rehash( struct brw_cache *cache )
    cache->size = size;
 }
 
-
-GLboolean brw_search_cache( struct brw_cache *cache,
-			    const void *key,
-			    GLuint key_size,
-			    void *aux_return,
-			    GLuint *offset_return)
+/**
+ * Returns the buffer object matching cache_id and key, or NULL.
+ */
+dri_bo *brw_search_cache( struct brw_cache *cache,
+			  enum brw_cache_id cache_id,
+			  const void *key,
+			  GLuint key_size,
+			  dri_bo **reloc_bufs, GLuint nr_reloc_bufs,
+			  void *aux_return )
 {
    struct brw_cache_item *item;
-   GLuint addr = 0;
    GLuint hash = hash_key(key, key_size);
 
-   item = search_cache(cache, hash, key, key_size);
+   item = search_cache(cache, cache_id, hash, key, key_size,
+		       reloc_bufs, nr_reloc_bufs);
 
-   if (item) {
-      if (aux_return) 
-	 *(void **)aux_return = (void *)((char *)item->key + item->key_size);
-      
-      *offset_return = addr = item->offset;
-   }    
-    
-   if (item == NULL || addr != cache->last_addr) {
-      cache->brw->state.dirty.cache |= 1<<cache->id;
-      cache->last_addr = addr;
-   }
-   
-   return item != NULL;
+   if (item == NULL)
+      return NULL;
+
+   if (aux_return)
+      *(void **)aux_return = (void *)((char *)item->key + item->key_size);
+
+   update_cache_last(cache, cache_id, item->bo);
+
+   dri_bo_reference(item->bo);
+   return item->bo;
 }
 
-GLuint brw_upload_cache( struct brw_cache *cache,
-			 const void *key,
-			 GLuint key_size,
-			 const void *data,
-			 GLuint data_size,
-			 const void *aux,
-			 void *aux_return )
-{   
-   GLuint offset;
+dri_bo *
+brw_upload_cache( struct brw_cache *cache,
+		  enum brw_cache_id cache_id,
+		  const void *key,
+		  GLuint key_size,
+		  dri_bo **reloc_bufs,
+		  GLuint nr_reloc_bufs,
+		  const void *data,
+		  GLuint data_size,
+		  const void *aux,
+		  void *aux_return )
+{
    struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
    GLuint hash = hash_key(key, key_size);
-   void *tmp = _mesa_malloc(key_size + cache->aux_size);
-   
-   if (!brw_pool_alloc(cache->pool, data_size, 1 << 6, &offset)) {
-      /* Should not be possible: 
-       */
-      _mesa_printf("brw_pool_alloc failed\n");
-      exit(1);
-   }
+   GLuint relocs_size = nr_reloc_bufs * sizeof(dri_bo *);
+   GLuint aux_size = cache->aux_size[cache_id];
+   void *tmp;
+   dri_bo *bo;
+   int i;
+
+   /* Create the buffer object to contain the data */
+   bo = dri_bo_alloc(cache->brw->intel.bufmgr,
+		     cache->name[cache_id], data_size, 1 << 6,
+		     DRM_BO_FLAG_MEM_LOCAL |
+		     DRM_BO_FLAG_CACHED |
+		     DRM_BO_FLAG_CACHED_MAPPED);
+
+
+   /* Set up the memory containing the key, aux_data, and reloc_bufs */
+   tmp = _mesa_malloc(key_size + aux_size + relocs_size);
 
    memcpy(tmp, key, key_size);
+   memcpy(tmp + key_size, aux, cache->aux_size[cache_id]);
+   memcpy(tmp + key_size + aux_size, reloc_bufs, relocs_size);
+   for (i = 0; i < nr_reloc_bufs; i++) {
+      if (reloc_bufs[i] != NULL)
+	 dri_bo_reference(reloc_bufs[i]);
+   }
 
-   if (cache->aux_size)
-      memcpy(tmp+key_size, aux, cache->aux_size);
-	 
+   item->cache_id = cache_id;
    item->key = tmp;
    item->hash = hash;
    item->key_size = key_size;
-   item->offset = offset;
+   item->reloc_bufs = tmp + key_size + aux_size;
+   item->nr_reloc_bufs = nr_reloc_bufs;
+
+   item->bo = bo;
+   dri_bo_reference(bo);
    item->data_size = data_size;
 
-   if (++cache->n_items > cache->size * 1.5)
+   if (cache->n_items > cache->size * 1.5)
       rehash(cache);
-   
+
    hash %= cache->size;
    item->next = cache->items[hash];
    cache->items[hash] = item;
-      
+   cache->n_items++;
+
    if (aux_return) {
-      assert(cache->aux_size);
+      assert(cache->aux_size[cache_id]);
       *(void **)aux_return = (void *)((char *)item->key + item->key_size);
    }
 
    if (INTEL_DEBUG & DEBUG_STATE)
-      _mesa_printf("upload %s: %d bytes to pool buffer %d offset %x\n",
-		   cache->name,
-		   data_size, 
-		   cache->pool->buffer,
-		   offset);
+      _mesa_printf("upload %s: %d bytes to cache id %d\n",
+		   cache->name[cache_id],
+		   data_size);
 
-   /* Copy data to the buffer:
-    */
-   dri_bo_subdata(cache->pool->buffer, offset, data_size, data);
+   /* Copy data to the buffer */
+   dri_bo_subdata(bo, 0, data_size, data);
 
-   cache->brw->state.dirty.cache |= 1<<cache->id;
-   cache->last_addr = offset;
+   update_cache_last(cache, cache_id, bo);
 
-   return offset;
+   return bo;
 }
 
 /* This doesn't really work with aux data.  Use search/upload instead
  */
-GLuint brw_cache_data_sz(struct brw_cache *cache,
-			 const void *data,
-			 GLuint data_size)
+dri_bo *
+brw_cache_data_sz(struct brw_cache *cache,
+		  enum brw_cache_id cache_id,
+		  const void *data,
+		  GLuint data_size,
+		  dri_bo **reloc_bufs,
+		  GLuint nr_reloc_bufs)
 {
-   GLuint addr;
+   dri_bo *bo;
+   struct brw_cache_item *item;
+   GLuint hash = hash_key(data, data_size);
 
-   if (!brw_search_cache(cache, data, data_size, NULL, &addr)) {
-      addr = brw_upload_cache(cache, 
-			      data, data_size, 
-			      data, data_size, 
-			      NULL, NULL);
+   item = search_cache(cache, cache_id, hash, data, data_size,
+		       reloc_bufs, nr_reloc_bufs);
+   if (item) {
+      dri_bo_reference(item->bo);
+      return item->bo;
    }
 
-   return addr;
+   bo = brw_upload_cache(cache, cache_id,
+			 data, data_size,
+			 reloc_bufs, nr_reloc_bufs,
+			 data, data_size,
+			 NULL, NULL);
+
+   return bo;
 }
 
-GLuint brw_cache_data(struct brw_cache *cache,
-		      const void *data)
+/**
+ * Wrapper around brw_cache_data_sz using the cache_id's canonical key size.
+ *
+ * If nr_reloc_bufs is nonzero, brw_search_cache()/brw_upload_cache() would be
+ * better to use, as the potentially changing offsets in the data-used-as-key
+ * will result in excessive cache misses.
+ */
+dri_bo *
+brw_cache_data(struct brw_cache *cache,
+	       enum brw_cache_id cache_id,
+	       const void *data,
+	       dri_bo **reloc_bufs,
+	       GLuint nr_reloc_bufs)
 {
-   return brw_cache_data_sz(cache, data, cache->key_size);
+   return brw_cache_data_sz(cache, cache_id, data, cache->key_size[cache_id],
+			    reloc_bufs, nr_reloc_bufs);
 }
 
 enum pool_type {
@@ -224,18 +297,25 @@ enum pool_type {
    DW_GENERAL_STATE
 };
 
-static void brw_init_cache( struct brw_context *brw, 
-			    const char *name,
-			    GLuint id,
-			    GLuint key_size,
-			    GLuint aux_size,
-			    enum pool_type pool_type)
+static void
+brw_init_cache_id( struct brw_context *brw,
+		const char *name,
+		enum brw_cache_id id,
+		GLuint key_size,
+		GLuint aux_size)
 {
-   struct brw_cache *cache = &brw->cache[id];
+   struct brw_cache *cache = &brw->cache;
+
+   cache->name[id] = strdup(name);
+   cache->key_size[id] = key_size;
+   cache->aux_size[id] = aux_size;
+}
+
+void brw_init_cache( struct brw_context *brw )
+{
+   struct brw_cache *cache = &brw->cache;
+
    cache->brw = brw;
-   cache->id = id;
-   cache->name = name;
-   cache->items = NULL;
 
    cache->size = 7;
    cache->n_items = 0;
@@ -243,137 +323,107 @@ static void brw_init_cache( struct brw_context *brw,
       _mesa_calloc(cache->size * 
 		   sizeof(struct brw_cache_item));
 
-
-   cache->key_size = key_size;
-   cache->aux_size = aux_size;
-   switch (pool_type) {
-   case DW_GENERAL_STATE: cache->pool = &brw->pool[BRW_GS_POOL]; break;
-   case DW_SURFACE_STATE: cache->pool = &brw->pool[BRW_SS_POOL]; break;
-   default: assert(0); break;
-   }
-}
-
-void brw_init_caches( struct brw_context *brw )
-{
-
-   brw_init_cache(brw,
-		  "CC_VP",
-		  BRW_CC_VP,
-		  sizeof(struct brw_cc_viewport),
-		  0,
-		  DW_GENERAL_STATE);
-
-   brw_init_cache(brw,
-		  "CC_UNIT",
-		  BRW_CC_UNIT,
-		  sizeof(struct brw_cc_unit_state),
-		  0,
-		  DW_GENERAL_STATE);
-
-   brw_init_cache(brw,
-		  "WM_PROG",
-		  BRW_WM_PROG,
-		  sizeof(struct brw_wm_prog_key),
-		  sizeof(struct brw_wm_prog_data),
-		  DW_GENERAL_STATE);
-
-   brw_init_cache(brw,
-		  "SAMPLER_DEFAULT_COLOR",
-		  BRW_SAMPLER_DEFAULT_COLOR,
-		  sizeof(struct brw_sampler_default_color),
-		  0,
-		  DW_GENERAL_STATE);
-
-   brw_init_cache(brw,
-		  "SAMPLER",
-		  BRW_SAMPLER,
-		  0,		/* variable key/data size */
-		  0,
-		  DW_GENERAL_STATE);
-
-   brw_init_cache(brw,
-		  "WM_UNIT",
-		  BRW_WM_UNIT,
-		  sizeof(struct brw_wm_unit_state),
-		  0,
-		  DW_GENERAL_STATE);
-
-   brw_init_cache(brw,
-		  "SF_PROG",
-		  BRW_SF_PROG,
-		  sizeof(struct brw_sf_prog_key),
-		  sizeof(struct brw_sf_prog_data),
-		  DW_GENERAL_STATE);
-
-   brw_init_cache(brw,
-		  "SF_VP",
-		  BRW_SF_VP,
-		  sizeof(struct brw_sf_viewport),
-		  0,
-		  DW_GENERAL_STATE);
-
-   brw_init_cache(brw,
-		  "SF_UNIT",
-		  BRW_SF_UNIT,
-		  sizeof(struct brw_sf_unit_state),
-		  0,
-		  DW_GENERAL_STATE);
-
-   brw_init_cache(brw,
-		  "VS_UNIT",
-		  BRW_VS_UNIT,
-		  sizeof(struct brw_vs_unit_state),
-		  0,
-		  DW_GENERAL_STATE);
-
-   brw_init_cache(brw,
-		  "VS_PROG",
-		  BRW_VS_PROG,
-		  sizeof(struct brw_vs_prog_key),
-		  sizeof(struct brw_vs_prog_data),
-		  DW_GENERAL_STATE);
-
-   brw_init_cache(brw,
-		  "CLIP_UNIT",
-		  BRW_CLIP_UNIT,
-		  sizeof(struct brw_clip_unit_state),
-		  0,
-		  DW_GENERAL_STATE);
-
-   brw_init_cache(brw,
-		  "CLIP_PROG",
-		  BRW_CLIP_PROG,
-		  sizeof(struct brw_clip_prog_key),
-		  sizeof(struct brw_clip_prog_data),
-		  DW_GENERAL_STATE);
-
-   brw_init_cache(brw,
-		  "GS_UNIT",
-		  BRW_GS_UNIT,
-		  sizeof(struct brw_gs_unit_state),
-		  0,
-		  DW_GENERAL_STATE);
-
-   brw_init_cache(brw,
-		  "GS_PROG",
-		  BRW_GS_PROG,
-		  sizeof(struct brw_gs_prog_key),
-		  sizeof(struct brw_gs_prog_data),
-		  DW_GENERAL_STATE);
-
-   brw_init_cache(brw,
-		  "SS_SURFACE",
-		  BRW_SS_SURFACE,
-		  sizeof(struct brw_surface_state),
-		  0,
-		  DW_SURFACE_STATE);
-
-   brw_init_cache(brw,
-		  "SS_SURF_BIND",
-		  BRW_SS_SURF_BIND,
-		  sizeof(struct brw_surface_binding_table),
-		  0,
-		  DW_SURFACE_STATE);
+   brw_init_cache_id(brw,
+		     "CC_VP",
+		     BRW_CC_VP,
+		     sizeof(struct brw_cc_viewport),
+		     0);
+
+   brw_init_cache_id(brw,
+		     "CC_UNIT",
+		     BRW_CC_UNIT,
+		     sizeof(struct brw_cc_unit_state),
+		     0);
+
+   brw_init_cache_id(brw,
+		     "WM_PROG",
+		     BRW_WM_PROG,
+		     sizeof(struct brw_wm_prog_key),
+		     sizeof(struct brw_wm_prog_data));
+
+   brw_init_cache_id(brw,
+		     "SAMPLER_DEFAULT_COLOR",
+		     BRW_SAMPLER_DEFAULT_COLOR,
+		     sizeof(struct brw_sampler_default_color),
+		     0);
+
+   brw_init_cache_id(brw,
+		     "SAMPLER",
+		     BRW_SAMPLER,
+		     0,		/* variable key/data size */
+		     0);
+
+   brw_init_cache_id(brw,
+		     "WM_UNIT",
+		     BRW_WM_UNIT,
+		     sizeof(struct brw_wm_unit_state),
+		     0);
+
+   brw_init_cache_id(brw,
+		     "SF_PROG",
+		     BRW_SF_PROG,
+		     sizeof(struct brw_sf_prog_key),
+		     sizeof(struct brw_sf_prog_data));
+
+   brw_init_cache_id(brw,
+		     "SF_VP",
+		     BRW_SF_VP,
+		     sizeof(struct brw_sf_viewport),
+		     0);
+
+   brw_init_cache_id(brw,
+		     "SF_UNIT",
+		     BRW_SF_UNIT,
+		     sizeof(struct brw_sf_unit_state),
+		     0);
+
+   brw_init_cache_id(brw,
+		     "VS_UNIT",
+		     BRW_VS_UNIT,
+		     sizeof(struct brw_vs_unit_state),
+		     0);
+
+   brw_init_cache_id(brw,
+		     "VS_PROG",
+		     BRW_VS_PROG,
+		     sizeof(struct brw_vs_prog_key),
+		     sizeof(struct brw_vs_prog_data));
+
+   brw_init_cache_id(brw,
+		     "CLIP_UNIT",
+		     BRW_CLIP_UNIT,
+		     sizeof(struct brw_clip_unit_state),
+		     0);
+
+   brw_init_cache_id(brw,
+		     "CLIP_PROG",
+		     BRW_CLIP_PROG,
+		     sizeof(struct brw_clip_prog_key),
+		     sizeof(struct brw_clip_prog_data));
+
+   brw_init_cache_id(brw,
+		     "GS_UNIT",
+		     BRW_GS_UNIT,
+		     sizeof(struct brw_gs_unit_state),
+		     0);
+
+   brw_init_cache_id(brw,
+		     "GS_PROG",
+		     BRW_GS_PROG,
+		     sizeof(struct brw_gs_prog_key),
+		     sizeof(struct brw_gs_prog_data));
+
+   brw_init_cache_id(brw,
+		     "SS_SURFACE",
+		     BRW_SS_SURFACE,
+		     sizeof(struct brw_surface_state),
+		     0);
+
+   brw_init_cache_id(brw,
+		     "SS_SURF_BIND",
+		     BRW_SS_SURF_BIND,
+		     0,
+		     0);
 }
 
 
@@ -399,7 +449,12 @@ static void clear_cache( struct brw_cache *cache )
 
    for (i = 0; i < cache->size; i++) {
       for (c = cache->items[i]; c; c = next) {
+	 int j;
+
 	 next = c->next;
+	 for (j = 0; j < c->nr_reloc_bufs; j++)
+	    dri_bo_unreference(c->reloc_bufs[j]);
+	 dri_bo_unreference(c->bo);
 	 free((void *)c->key);
 	 free(c);
       }
@@ -409,15 +464,12 @@ static void clear_cache( struct brw_cache *cache )
    cache->n_items = 0;
 }
 
-void brw_clear_all_caches( struct brw_context *brw )
+void brw_clear_cache( struct brw_context *brw )
 {
-   GLint i;
-
    if (INTEL_DEBUG & DEBUG_STATE)
       _mesa_printf("%s\n", __FUNCTION__);
 
-   for (i = 0; i < BRW_MAX_CACHE; i++)
-      clear_cache(&brw->cache[i]);      
+   clear_cache(&brw->cache);
 
    if (brw->curbe.last_buf) {
       _mesa_free(brw->curbe.last_buf);
@@ -429,14 +481,24 @@ void brw_clear_all_caches( struct brw_context *brw )
    brw->state.dirty.cache |= ~0;
 }
 
+void brw_state_cache_check_size( struct brw_context *brw )
+{
+   /* un-tuned guess.  We've got around 20 state objects for a total of around
+    * 32k, so 1000 of them is around 1.5MB.
+    */
+   if (brw->cache.n_items > 1000)
+      brw_clear_cache(brw);
+}
 
-
-
-
-void brw_destroy_caches( struct brw_context *brw )
+void brw_destroy_cache( struct brw_context *brw )
 {
    GLuint i;
 
+   clear_cache(&brw->cache);
    for (i = 0; i < BRW_MAX_CACHE; i++)
-      clear_cache(&brw->cache[i]);      
+      free(brw->cache.name[i]);
+
+   free(brw->cache.items);
+   brw->cache.items = NULL;
+   brw->cache.size = 0;
 }
diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c
index 1e8fc97275..9eb27cc267 100644
--- a/src/mesa/drivers/dri/i965/brw_state_dump.c
+++ b/src/mesa/drivers/dri/i965/brw_state_dump.c
@@ -41,13 +41,13 @@
  * \param index Index of the DWORD being output.
  */
 static void
-state_out(char *name, uint32_t *data, uint32_t hw_offset, int index,
+state_out(const char *name, void *data, uint32_t hw_offset, int index,
 	  char *fmt, ...)
 {
     va_list va;
 
     fprintf(stderr, "%8s: 0x%08x: 0x%08x: ",
-	    name, hw_offset + index * 4, data[index]);
+	    name, hw_offset + index * 4, ((uint32_t *)data)[index]);
     va_start(va, fmt);
     vfprintf(stderr, fmt, va);
     va_end(va);
@@ -55,43 +55,115 @@ state_out(char *name, uint32_t *data, uint32_t hw_offset, int index,
 
 /** Generic, undecoded state buffer debug printout */
 static void
-state_struct_out(char *name, dri_bo *buffer, unsigned int pool_offset,
-		 unsigned int state_size)
+state_struct_out(const char *name, dri_bo *buffer, unsigned int state_size)
 {
    int i;
-   uint32_t *state;
 
-   state = buffer->virtual + pool_offset;
+   if (buffer == NULL)
+      return;
+
+   dri_bo_map(buffer, GL_FALSE);
    for (i = 0; i < state_size / 4; i++) {
-      state_out(name, state, buffer->offset + pool_offset, i,
+      state_out(name, buffer->virtual, buffer->offset, i,
 		"dword %d\n", i);
    }
+   dri_bo_unmap(buffer);
+}
+
+static const char *
+get_965_surfacetype(unsigned int surfacetype)
+{
+    switch (surfacetype) {
+    case 0: return "1D";
+    case 1: return "2D";
+    case 2: return "3D";
+    case 3: return "CUBE";
+    case 4: return "BUFFER";
+    case 7: return "NULL";
+    default: return "unknown";
+    }
 }
 
-static void dump_wm_surface_state(struct brw_context *brw, dri_bo *ss_buffer)
+static void dump_wm_surface_state(struct brw_context *brw)
 {
    int i;
 
    for (i = 0; i < brw->wm.nr_surfaces; i++) {
-      unsigned int surfoff = ss_buffer->offset + brw->wm.bind.surf_ss_offset[i];
-      struct brw_surface_state *surf =
-	 (struct brw_surface_state *)(ss_buffer->virtual +
-				      brw->wm.bind.surf_ss_offset[i]);
-      uint32_t *surfvals = (uint32_t *)surf;
+      dri_bo *surf_bo = brw->wm.surf_bo[i];
+      unsigned int surfoff;
+      struct brw_surface_state *surf;
       char name[20];
 
+      dri_bo_map(surf_bo, GL_FALSE);
+      surfoff = surf_bo->offset;
+      surf = (struct brw_surface_state *)(surf_bo->virtual);
+
       sprintf(name, "WM SS%d", i);
-      state_out(name, surfvals, surfoff, 0, "\n");
-      state_out(name, surfvals, surfoff, 1, "offset\n");
-      state_out(name, surfvals, surfoff, 2, "%dx%d size, %d mips\n",
+      state_out(name, surf, surfoff, 0, "%s\n",
+		get_965_surfacetype(surf->ss0.surface_type));
+      state_out(name, surf, surfoff, 1, "offset\n");
+      state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n",
 		surf->ss2.width + 1, surf->ss2.height + 1, surf->ss2.mip_count);
-      state_out(name, surfvals, surfoff, 3, "pitch %d, %stiled\n",
+      state_out(name, surf, surfoff, 3, "pitch %d, %stiled\n",
 		surf->ss3.pitch + 1, surf->ss3.tiled_surface ? "" : "not ");
-      state_out(name, surfvals, surfoff, 4, "mip base %d\n",
+      state_out(name, surf, surfoff, 4, "mip base %d\n",
 		surf->ss4.min_lod);
+
+      dri_bo_unmap(surf_bo);
    }
 }
 
+static void dump_sf_viewport_state(struct brw_context *brw)
+{
+   const char *name = "SF VP";
+   struct brw_sf_viewport *vp;
+   uint32_t vp_off;
+
+   if (brw->sf.vp_bo == NULL)
+      return;
+
+   dri_bo_map(brw->sf.vp_bo, GL_FALSE);
+
+   vp = brw->sf.vp_bo->virtual;
+   vp_off = brw->sf.vp_bo->offset;
+
+   state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00);
+   state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11);
+   state_out(name, vp, vp_off, 2, "m22 = %f\n", vp->viewport.m22);
+   state_out(name, vp, vp_off, 3, "m30 = %f\n", vp->viewport.m30);
+   state_out(name, vp, vp_off, 4, "m31 = %f\n", vp->viewport.m31);
+   state_out(name, vp, vp_off, 5, "m32 = %f\n", vp->viewport.m32);
+
+   state_out(name, vp, vp_off, 6, "top left = %d,%d\n",
+	     vp->scissor.xmin, vp->scissor.ymin);
+   state_out(name, vp, vp_off, 7, "bottom right = %d,%d\n",
+	     vp->scissor.xmax, vp->scissor.ymax);
+
+   dri_bo_unmap(brw->sf.vp_bo);
+}
+
+static void brw_debug_prog(const char *name, dri_bo *prog)
+{
+   unsigned int i;
+   uint32_t *data;
+
+   if (prog == NULL)
+      return;
+
+   dri_bo_map(prog, GL_FALSE);
+
+   data = prog->virtual;
+
+   for (i = 0; i < prog->size / 4 / 4; i++) {
+      fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n",
+	      name, (unsigned int)prog->offset + i * 4 * 4,
+	      data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]);
+   }
+
+   dri_bo_unmap(prog);
+}
+
+
 /**
  * Print additional debug information associated with the batchbuffer
  * when DEBUG_BATCH is set.
@@ -105,27 +177,20 @@ static void dump_wm_surface_state(struct brw_context *brw, dri_bo *ss_buffer)
 void brw_debug_batch(struct intel_context *intel)
 {
    struct brw_context *brw = brw_context(&intel->ctx);
-   dri_bo *ss_buffer, *gs_buffer;
 
-   ss_buffer = brw->pool[BRW_SS_POOL].buffer;
-   gs_buffer = brw->pool[BRW_GS_POOL].buffer;
+   state_struct_out("WM bind", brw->wm.bind_bo, 4 * brw->wm.nr_surfaces);
+   dump_wm_surface_state(brw);
 
-   dri_bo_map(ss_buffer, GL_FALSE);
-   dri_bo_map(gs_buffer, GL_FALSE);
+   state_struct_out("VS", brw->vs.state_bo, sizeof(struct brw_vs_unit_state));
+   brw_debug_prog("VS prog", brw->vs.prog_bo);
 
-   state_struct_out("WM bind", ss_buffer, brw->wm.bind_ss_offset,
-		    4 * brw->wm.nr_surfaces);
-   dump_wm_surface_state(brw, ss_buffer);
+   state_struct_out("GS", brw->gs.state_bo, sizeof(struct brw_gs_unit_state));
+   brw_debug_prog("GS prog", brw->gs.prog_bo);
 
-   state_struct_out("VS", gs_buffer, brw->vs.state_gs_offset,
-		    sizeof(struct brw_vs_unit_state));
-   state_struct_out("SF", gs_buffer, brw->sf.state_gs_offset,
-		    sizeof(struct brw_sf_unit_state));
-   state_struct_out("SF viewport", gs_buffer, brw->sf.state_gs_offset,
-		    sizeof(struct brw_sf_unit_state));
-   state_struct_out("WM", gs_buffer, brw->wm.state_gs_offset,
-		    sizeof(struct brw_wm_unit_state));
+   state_struct_out("SF", brw->sf.state_bo, sizeof(struct brw_sf_unit_state));
+   dump_sf_viewport_state(brw);
+   brw_debug_prog("SF prog", brw->vs.prog_bo);
 
-   dri_bo_unmap(gs_buffer);
-   dri_bo_unmap(ss_buffer);
+   state_struct_out("WM", brw->sf.state_bo, sizeof(struct brw_wm_unit_state));
+   brw_debug_prog("WM prog", brw->vs.prog_bo);
 }
diff --git a/src/mesa/drivers/dri/i965/brw_state_pool.c b/src/mesa/drivers/dri/i965/brw_state_pool.c
deleted file mode 100644
index 148bb516a6..0000000000
--- a/src/mesa/drivers/dri/i965/brw_state_pool.c
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
- develop this 3D driver.
- 
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
- 
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
- 
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keith@tungstengraphics.com>
-  */
-       
-
-#include "brw_state.h"
-#include "imports.h"
-
-#include "intel_ioctl.h"
-#include "dri_bufmgr.h"
-
-GLboolean brw_pool_alloc( struct brw_mem_pool *pool,
-			  GLuint size,
-			  GLuint align,
-			  GLuint *offset_return)
-{
-   GLuint fixup = ALIGN(pool->offset, align) - pool->offset;
-
-   size = ALIGN(size, 4);
-
-   if (pool->offset + fixup + size >= pool->size) {
-      _mesa_printf("%s failed\n", __FUNCTION__);
-      assert(0);
-      exit(0);
-   }
-
-   pool->offset += fixup;
-   *offset_return = pool->offset;
-   pool->offset += size; 
-  
-   return GL_TRUE;
-}
-
-static
-void brw_invalidate_pool( struct intel_context *intel,
-			  struct brw_mem_pool *pool )
-{
-   if (INTEL_DEBUG & DEBUG_STATE)
-      _mesa_printf("\n\n\n %s \n\n\n", __FUNCTION__);
-
-   pool->offset = 0;
-
-   brw_clear_all_caches(pool->brw);
-}
-
-static void
-brw_invalidate_pool_cb(dri_bo *bo, void *ptr)
-{
-   struct brw_mem_pool *pool = ptr;
-   struct brw_context *brw = pool->brw;
-
-   brw_invalidate_pool(&brw->intel, pool);
-}
-
-static void brw_init_pool( struct brw_context *brw,
-			   GLuint pool_id,
-			   GLuint size )
-{
-   struct brw_mem_pool *pool = &brw->pool[pool_id];
-
-   pool->size = size;   
-   pool->brw = brw;
-
-   pool->buffer = dri_bo_alloc(brw->intel.bufmgr,
-			       (pool_id == BRW_GS_POOL) ? "GS pool" : "SS pool",
-			       size, 4096, DRM_BO_FLAG_MEM_TT);
-
-   /* Disable the backing store for the state cache.  It's not worth the
-    * cost of keeping a backing store copy, since we can just regenerate
-    * the contents at approximately the same cost as the memcpy, and only
-    * if the contents are lost.
-    */
-   if (!brw->intel.ttm) {
-      dri_bo_fake_disable_backing_store(pool->buffer, brw_invalidate_pool_cb,
-					pool);
-   }
-}
-
-static void brw_destroy_pool( struct brw_context *brw,
-			      GLuint pool_id )
-{
-   struct brw_mem_pool *pool = &brw->pool[pool_id];
-
-   dri_bo_unreference(pool->buffer);
-}
-
-
-void brw_pool_check_wrap( struct brw_context *brw,
-			  struct brw_mem_pool *pool )
-{
-   if (pool->offset > (pool->size * 3) / 4) {
-      brw->state.dirty.brw |= BRW_NEW_CONTEXT;
-   }
-
-}
-
-void brw_init_pools( struct brw_context *brw )
-{
-   brw_init_pool(brw, BRW_GS_POOL, 0x80000);
-   brw_init_pool(brw, BRW_SS_POOL, 0x80000);
-}
-
-void brw_destroy_pools( struct brw_context *brw )
-{
-   brw_destroy_pool(brw, BRW_GS_POOL);
-   brw_destroy_pool(brw, BRW_SS_POOL);
-}
-
-
-void brw_invalidate_pools( struct brw_context *brw )
-{
-   brw_invalidate_pool(&brw->intel, &brw->pool[BRW_GS_POOL]);
-   brw_invalidate_pool(&brw->intel, &brw->pool[BRW_SS_POOL]);
-}
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 98637a6097..94165da816 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -112,8 +112,7 @@ void brw_init_state( struct brw_context *brw )
 {
    GLuint i;
 
-   brw_init_pools(brw);
-   brw_init_caches(brw);
+   brw_init_cache(brw);
 
    brw->state.atoms = _mesa_malloc(sizeof(atoms));
    brw->state.nr_atoms = sizeof(atoms)/sizeof(*atoms);
@@ -138,9 +137,8 @@ void brw_destroy_state( struct brw_context *brw )
       brw->state.atoms = NULL;
    }
 
-   brw_destroy_caches(brw);
+   brw_destroy_cache(brw);
    brw_destroy_batch_cache(brw);
-   brw_destroy_pools(brw);   
 }
 
 /***********************************************************************
diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h
index 10fee944e8..dd11640e6b 100644
--- a/src/mesa/drivers/dri/i965/brw_structs.h
+++ b/src/mesa/drivers/dri/i965/brw_structs.h
@@ -239,39 +239,39 @@ struct brw_pipelined_state_pointers
    
    struct {
       GLuint pad:5;
-      GLuint offset:27; 
+      GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
    } vs;
    
    struct
    {
       GLuint enable:1;
       GLuint pad:4;
-      GLuint offset:27; 
+      GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
    } gs;
    
    struct
    {
       GLuint enable:1;
       GLuint pad:4;
-      GLuint offset:27; 
+      GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
    } clp;
    
    struct
    {
       GLuint pad:5;
-      GLuint offset:27; 
+      GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
    } sf;
 
    struct
    {
       GLuint pad:5;
-      GLuint offset:27; 
+      GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
    } wm;
    
    struct
    {
       GLuint pad:5;
-      GLuint offset:27; /* KW: check me! */
+      GLuint offset:27; /* Offset from GENERAL_STATE_BASE. KW: check me! */
    } cc;
 };
 
@@ -473,7 +473,7 @@ struct thread0
    GLuint pad0:1;
    GLuint grf_reg_count:3; 
    GLuint pad1:2;
-   GLuint kernel_start_pointer:26; 
+   GLuint kernel_start_pointer:26; /* Offset from GENERAL_STATE_BASE */
 };
 
 struct thread1
@@ -637,7 +637,7 @@ struct brw_cc_unit_state
    struct
    {
       GLuint pad0:5; 
-      GLuint cc_viewport_state_offset:27; 
+      GLuint cc_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */
    } cc4;
    
    struct
@@ -699,7 +699,7 @@ struct brw_sf_unit_state
       GLuint front_winding:1; 
       GLuint viewport_transform:1; 
       GLuint pad0:3;
-      GLuint sf_viewport_state_offset:27; 
+      GLuint sf_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */
    } sf5;
    
    struct
@@ -924,6 +924,7 @@ struct brw_sf_viewport
       GLfloat m32;  
    } viewport;
 
+   /* scissor coordinates are inclusive */
    struct {
       GLshort xmin;
       GLshort ymin;
diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index 2b42918e15..e437c411dd 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -40,7 +40,7 @@
 
 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
 
-GLboolean brw_miptree_layout( struct intel_mipmap_tree *mt )
+GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_tree *mt )
 {
    /* XXX: these vary depending on image format: 
     */
@@ -65,7 +65,7 @@ GLboolean brw_miptree_layout( struct intel_mipmap_tree *mt )
           mt->pitch = ALIGN(width, align_w);
           pack_y_pitch = (height + 3) / 4;
       } else {
-          mt->pitch = ALIGN(mt->width0 * mt->cpp, 4) / mt->cpp;
+          mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0);
           pack_y_pitch = ALIGN(mt->height0, align_h);
       }
 
@@ -123,7 +123,7 @@ GLboolean brw_miptree_layout( struct intel_mipmap_tree *mt )
    }
 
    default:
-      i945_miptree_layout_2d(mt);
+      i945_miptree_layout_2d(intel, mt);
       break;
    }
    DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, 
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index e173f6fce3..038d7f7911 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -73,15 +73,13 @@ static void do_vs_prog( struct brw_context *brw,
     */
    program = brw_get_program(&c.func, &program_size);
 
-   /*
-    */
-   brw->vs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_VS_PROG],
-					      &c.key,
-					      sizeof(c.key),
-					      program,
-					      program_size,
-					      &c.prog_data,
-					      &brw->vs.prog_data);
+   dri_bo_unreference(brw->vs.prog_bo);
+   brw->vs.prog_bo = brw_upload_cache( &brw->cache, BRW_VS_PROG,
+				       &c.key, sizeof(c.key),
+				       NULL, 0,
+				       program, program_size,
+				       &c.prog_data,
+				       &brw->vs.prog_data );
 }
 
 
@@ -110,13 +108,13 @@ static void brw_upload_vs_prog( struct brw_context *brw )
 
    /* Make an early check for the key.
     */
-   if (brw_search_cache(&brw->cache[BRW_VS_PROG], 
-			&key, sizeof(key),
-			&brw->vs.prog_data,
-			&brw->vs.prog_gs_offset))
-       return;
-
-   do_vs_prog(brw, vp, &key);
+   dri_bo_unreference(brw->vs.prog_bo);
+   brw->vs.prog_bo = brw_search_cache(&brw->cache, BRW_VS_PROG,
+				      &key, sizeof(key),
+				      NULL, 0,
+				      &brw->vs.prog_data);
+   if (brw->vs.prog_bo == NULL)
+      do_vs_prog(brw, vp, &key);
 }
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
index f561979138..2d788d35ec 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_state.c
@@ -43,7 +43,7 @@ static void upload_vs_unit( struct brw_context *brw )
    memset(&vs, 0, sizeof(vs));
 
    /* CACHE_NEW_VS_PROG */
-   vs.thread0.kernel_start_pointer = brw->vs.prog_gs_offset >> 6;
+   vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */
    vs.thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1;
    vs.thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length;
    vs.thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length;
@@ -87,9 +87,22 @@ static void upload_vs_unit( struct brw_context *brw )
     */
    vs.vs6.vs_enable = 1;
 
-   brw->vs.state_gs_offset = brw_cache_data( &brw->cache[BRW_VS_UNIT], &vs );
+   brw->vs.thread0_delta = vs.thread0.grf_reg_count << 1;
+
+   dri_bo_unreference(brw->vs.state_bo);
+   brw->vs.state_bo = brw_cache_data( &brw->cache, BRW_VS_UNIT , &vs,
+				      &brw->vs.prog_bo, 1 );
 }
 
+static void emit_reloc_vs_unit(struct brw_context *brw)
+{
+   /* Emit VS program relocation */
+   dri_emit_reloc(brw->vs.state_bo,
+		  DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+		  brw->vs.thread0_delta,
+		  offsetof(struct brw_vs_unit_state, thread0),
+		  brw->vs.prog_bo);
+}
 
 const struct brw_tracked_state brw_vs_unit = {
    .dirty = {
@@ -98,5 +111,6 @@ const struct brw_tracked_state brw_vs_unit = {
 		BRW_NEW_URB_FENCE),
       .cache = CACHE_NEW_VS_PROG
    },
-   .update = upload_vs_unit
+   .update = upload_vs_unit,
+   .emit_reloc = emit_reloc_vs_unit,
 };
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
index b9dc9ad180..2bc8849867 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -75,8 +75,8 @@ static void brw_set_draw_region( struct intel_context *intel,
 {
    struct brw_context *brw = brw_context(&intel->ctx);
 
-   intel_region_release(intel, &brw->state.draw_region);
-   intel_region_release(intel, &brw->state.depth_region);
+   intel_region_release(&brw->state.draw_region);
+   intel_region_release(&brw->state.depth_region);
    intel_region_reference(&brw->state.draw_region, draw_region);
    intel_region_reference(&brw->state.depth_region, depth_region);
 }
@@ -112,10 +112,9 @@ static void brw_note_fence( struct intel_context *intel,
  
 static void brw_note_unlock( struct intel_context *intel )
 {
-  struct brw_context *brw = brw_context(&intel->ctx);
+   struct brw_context *brw = brw_context(&intel->ctx);
 
-   brw_pool_check_wrap(brw, &brw->pool[BRW_GS_POOL]);
-   brw_pool_check_wrap(brw, &brw->pool[BRW_SS_POOL]);
+   brw_state_cache_check_size(brw);
 
    brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_LOCK;
 }
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 2d6249e3b5..2775fad37b 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -201,15 +201,13 @@ static void do_wm_prog( struct brw_context *brw,
     */
    program = brw_get_program(&c->func, &program_size);
 
-   /*
-    */
-   brw->wm.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_WM_PROG],
-					      &c->key,
-					      sizeof(c->key),
-					      program,
-					      program_size,
-					      &c->prog_data,
-					      &brw->wm.prog_data );
+   dri_bo_unreference(brw->wm.prog_bo);
+   brw->wm.prog_bo = brw_upload_cache( &brw->cache, BRW_WM_PROG,
+				       &c->key, sizeof(c->key),
+				       NULL, 0,
+				       program, program_size,
+				       &c->prog_data,
+				       &brw->wm.prog_data );
 }
 
 
@@ -249,7 +247,8 @@ static void brw_wm_populate_key( struct brw_context *brw,
       lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
 
       if (brw->attribs.Stencil->WriteMask[0] ||
-	  (brw->attribs.Stencil->TestTwoSide && brw->attribs.Stencil->WriteMask[1]))
+	  (brw->attribs.Stencil->_TestTwoSide &&
+	   brw->attribs.Stencil->WriteMask[1]))
 	 lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
    }
 
@@ -331,13 +330,13 @@ static void brw_upload_wm_prog( struct brw_context *brw )
 
    /* Make an early check for the key.
     */
-   if (brw_search_cache(&brw->cache[BRW_WM_PROG], 
-			&key, sizeof(key),
-			&brw->wm.prog_data,
-			&brw->wm.prog_gs_offset))
-      return;
-
-   do_wm_prog(brw, fp, &key);
+   dri_bo_unreference(brw->wm.prog_bo);
+   brw->wm.prog_bo = brw_search_cache(&brw->cache, BRW_WM_PROG,
+				      &key, sizeof(key),
+				      NULL, 0,
+				      &brw->wm.prog_data);
+   if (brw->wm.prog_bo == NULL)
+      do_wm_prog(brw, fp, &key);
 }
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index 440b5357d5..d4eb2330b7 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -269,6 +269,6 @@ void brw_wm_lookup_iz( GLuint line_aa,
 		       GLuint lookup,
 		       struct brw_wm_prog_key *key );
 
-GLboolean brw_wm_is_glsl(struct gl_fragment_program *fp);
+GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp);
 void brw_wm_glsl_emit(struct brw_wm_compile *c);
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index e738086fef..1ca5c67a0b 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -5,7 +5,7 @@
 #include "brw_wm.h"
 
 /* Only guess, need a flag in gl_fragment_program later */
-GLboolean brw_wm_is_glsl(struct gl_fragment_program *fp)
+GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
 {
     int i;
     for (i = 0; i < fp->Base.NumInstructions; i++) {
diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
index 3c0952acf0..d410b1e804 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@@ -79,14 +79,15 @@ static GLint S_FIXED(GLfloat value, GLuint frac_bits)
 }
 
 
-static GLuint upload_default_color( struct brw_context *brw,
-				    const GLfloat *color )
+static dri_bo *upload_default_color( struct brw_context *brw,
+				     const GLfloat *color )
 {
    struct brw_sampler_default_color sdc;
 
    COPY_4V(sdc.color, color); 
    
-   return brw_cache_data( &brw->cache[BRW_SAMPLER_DEFAULT_COLOR], &sdc );
+   return brw_cache_data( &brw->cache, BRW_SAMPLER_DEFAULT_COLOR, &sdc,
+			  NULL, 0 );
 }
 
 
@@ -94,7 +95,7 @@ static GLuint upload_default_color( struct brw_context *brw,
  */
 static void brw_update_sampler_state( struct gl_texture_unit *texUnit,
 				      struct gl_texture_object *texObj,
-				      GLuint sdc_gs_offset,
+				      dri_bo *sdc_bo,
 				      struct brw_sampler_state *sampler)
 {   
    _mesa_memset(sampler, 0, sizeof(*sampler));
@@ -195,7 +196,7 @@ static void brw_update_sampler_state( struct gl_texture_unit *texUnit,
    sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(texObj->MaxLod, 0), 13), 6);
    sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(texObj->MinLod, 0), 13), 6);
    
-   sampler->ss2.default_color_pointer = sdc_gs_offset >> 5;
+   sampler->ss2.default_color_pointer = sdc_bo->offset >> 5; /* reloc */
 }
 
 
@@ -208,6 +209,7 @@ static void upload_wm_samplers( struct brw_context *brw )
 {
    GLuint unit;
    GLuint sampler_count = 0;
+   dri_bo *reloc_bufs[BRW_MAX_TEX_UNIT];
 
    /* _NEW_TEXTURE */
    for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) {
@@ -215,15 +217,20 @@ static void upload_wm_samplers( struct brw_context *brw )
 	 struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[unit];
 	 struct gl_texture_object *texObj = texUnit->_Current;
 
-	 GLuint sdc_gs_offset = upload_default_color(brw, texObj->BorderColor);
+	 dri_bo_unreference(brw->wm.sdc_bo[unit]);
+	 brw->wm.sdc_bo[unit] = upload_default_color(brw, texObj->BorderColor);
 
 	 brw_update_sampler_state(texUnit,
-				  texObj, 
-				  sdc_gs_offset,
+				  texObj,
+				  brw->wm.sdc_bo[unit],
 				  &brw->wm.sampler[unit]);
 
 	 sampler_count = unit + 1;
+      } else {
+	 dri_bo_unreference(brw->wm.sdc_bo[unit]);
+	 brw->wm.sdc_bo[unit] = NULL;
       }
+      reloc_bufs[unit] = brw->wm.sdc_bo[unit];
    }
    
    if (brw->wm.sampler_count != sampler_count) {
@@ -231,15 +238,39 @@ static void upload_wm_samplers( struct brw_context *brw )
       brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
    }
 
-   brw->wm.sampler_gs_offset = 0;
-
-   if (brw->wm.sampler_count) 
-      brw->wm.sampler_gs_offset = 
-	 brw_cache_data_sz(&brw->cache[BRW_SAMPLER],
+   dri_bo_unreference(brw->wm.sampler_bo);
+   if (brw->wm.sampler_count) {
+      brw->wm.sampler_bo =
+	 brw_cache_data_sz(&brw->cache, BRW_SAMPLER,
 			   brw->wm.sampler,
-			   sizeof(struct brw_sampler_state) * brw->wm.sampler_count);
+			   sizeof(struct brw_sampler_state) *
+			   brw->wm.sampler_count,
+			   reloc_bufs, BRW_MAX_TEX_UNIT);
+   } else {
+      brw->wm.sampler_bo = NULL;
+   }
 }
 
+static void emit_reloc_wm_samplers(struct brw_context *brw)
+{
+   GLuint unit;
+
+   if (brw->wm.sampler_count == 0)
+      return;
+
+   /* Emit SDC relocations */
+   for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) {
+      if (!brw->attribs.Texture->Unit[unit]._ReallyEnabled)
+	 continue;
+
+      dri_emit_reloc(brw->wm.sampler_bo,
+		     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+		     0,
+		     unit * sizeof(struct brw_sampler_state) +
+		     offsetof(struct brw_sampler_state, ss2),
+		     brw->wm.sdc_bo[unit]);
+   }
+}
 
 const struct brw_tracked_state brw_wm_samplers = {
    .dirty = {
@@ -247,7 +278,8 @@ const struct brw_tracked_state brw_wm_samplers = {
       .brw = 0,
       .cache = 0
    },
-   .update = upload_wm_samplers
+   .update = upload_wm_samplers,
+  .emit_reloc = emit_reloc_wm_samplers,
 };
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index 76865217bf..8a7236e62f 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -47,6 +47,7 @@ static void upload_wm_unit(struct brw_context *brw )
    struct brw_wm_unit_state wm;
    GLuint max_threads;
    GLuint per_thread;
+   dri_bo *reloc_bufs[3];
 
    if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
       max_threads = 0; 
@@ -58,7 +59,7 @@ static void upload_wm_unit(struct brw_context *brw )
 
    /* CACHE_NEW_WM_PROG */
    wm.thread0.grf_reg_count = ALIGN(brw->wm.prog_data->total_grf, 16) / 16 - 1;
-   wm.thread0.kernel_start_pointer = brw->wm.prog_gs_offset >> 6;
+   wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */
    wm.thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
    wm.thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length;
    wm.thread3.const_urb_entry_read_length = brw->wm.prog_data->curb_read_length;
@@ -87,19 +88,21 @@ static void upload_wm_unit(struct brw_context *brw )
 					       4096, DRM_BO_FLAG_MEM_TT);
       }
    }
-   /* XXX: Scratch buffers are not implemented correectly.
-    *
-    * The scratch offset to be programmed into wm is relative to the general
-    * state base address.  However, using dri_bo_alloc/dri_bo_emit_reloc (or
-    * the previous bmGenBuffers scheme), we get an offset relative to the
-    * start of framebuffer.  Even before then, it was broken in other ways,
-    * so just fail for now if we hit that path.
-    */
-   assert(brw->wm.prog_data->total_scratch == 0);
 
    /* CACHE_NEW_SURFACE */
    wm.thread1.binding_table_entry_count = brw->wm.nr_surfaces;
 
+   /* CACHE_NEW_WM_PROG */
+   if (per_thread != 0) {
+   /* reloc */
+      wm.thread2.scratch_space_base_pointer =
+	 brw->wm.scratch_buffer->offset >> 10;
+      wm.thread2.per_thread_scratch_space = per_thread / 1024 - 1;
+   } else {
+      wm.thread2.scratch_space_base_pointer = 0;
+      wm.thread2.per_thread_scratch_space = 0;
+   }
+
    /* BRW_NEW_CURBE_OFFSETS */
    wm.thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2;
 
@@ -109,7 +112,12 @@ static void upload_wm_unit(struct brw_context *brw )
 
    /* CACHE_NEW_SAMPLER */
    wm.wm4.sampler_count = (brw->wm.sampler_count + 1) / 4;
-   wm.wm4.sampler_state_pointer = brw->wm.sampler_gs_offset >> 5;
+   if (brw->wm.sampler_bo != NULL) {
+      /* reloc */
+      wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5;
+   } else {
+      wm.wm4.sampler_state_pointer = 0;
+   }
 
    /* BRW_NEW_FRAGMENT_PROGRAM */
    {
@@ -166,19 +174,44 @@ static void upload_wm_unit(struct brw_context *brw )
    if (INTEL_DEBUG & DEBUG_STATS || intel->stats_wm)
       wm.wm4.stats_enable = 1;
 
-   brw->wm.state_gs_offset = brw_cache_data( &brw->cache[BRW_WM_UNIT], &wm );
+   reloc_bufs[0] = brw->wm.prog_bo;
+   reloc_bufs[1] = brw->wm.scratch_buffer;
+   reloc_bufs[2] = brw->wm.sampler_bo;
 
-   if (brw->wm.prog_data->total_scratch) {
-      /*
-      dri_emit_reloc(brw->cache[BRW_WM_UNIT].pool->buffer,
+   brw->wm.thread0_delta = wm.thread0.grf_reg_count << 1;
+   brw->wm.thread2_delta = wm.thread2.per_thread_scratch_space;
+   brw->wm.wm4_delta = wm.wm4.stats_enable | (wm.wm4.sampler_count << 2);
+
+   dri_bo_unreference(brw->wm.state_bo);
+   brw->wm.state_bo = brw_cache_data( &brw->cache, BRW_WM_UNIT, &wm,
+				      reloc_bufs, 3 );
+}
+
+static void emit_reloc_wm_unit(struct brw_context *brw)
+{
+   /* Emit WM program relocation */
+   dri_emit_reloc(brw->wm.state_bo,
+		  DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+		  brw->wm.thread0_delta,
+		  offsetof(struct brw_wm_unit_state, thread0),
+		  brw->wm.prog_bo);
+
+   /* Emit scratch space relocation */
+   if (brw->wm.scratch_buffer != NULL) {
+      dri_emit_reloc(brw->wm.state_bo,
 		     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE,
-		     (per_thread / 1024) - 1,
-		     brw->wm.state_gs_offset +
-		     ((char *)&wm.thread2 - (char *)&wm),
+		     brw->wm.thread2_delta,
+		     offsetof(struct brw_wm_unit_state, thread2),
 		     brw->wm.scratch_buffer);
-      */
-   } else {
-      wm.thread2.scratch_space_base_pointer = 0;
+   }
+
+   /* Emit sampler state relocation */
+   if (brw->wm.sampler_bo != NULL) {
+      dri_emit_reloc(brw->wm.state_bo,
+		     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+		     brw->wm.wm4_delta,
+		     offsetof(struct brw_wm_unit_state, wm4),
+		     brw->wm.sampler_bo);
    }
 }
 
@@ -197,6 +230,7 @@ const struct brw_tracked_state brw_wm_unit = {
 		CACHE_NEW_WM_PROG | 
 		CACHE_NEW_SAMPLER)
    },
-   .update = upload_wm_unit
+   .update = upload_wm_unit,
+   .emit_reloc = emit_reloc_wm_unit,
 };
 
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 2ade4eeae8..efec0e7517 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -139,40 +139,46 @@ static GLuint translate_tex_format( GLuint mesa_format )
    }
 }
 
-static
-void brw_update_texture_surface( GLcontext *ctx, GLuint unit )
+struct brw_wm_surface_key {
+   GLenum target;
+   dri_bo *bo;
+   GLint format;
+   GLint first_level, last_level;
+   GLint width, height, depth;
+   GLint pitch, cpp;
+   GLboolean tiled;
+};
+
+static dri_bo *
+brw_create_texture_surface( struct brw_context *brw,
+			    struct brw_wm_surface_key *key )
 {
-   struct brw_context *brw = brw_context(ctx);
-   struct gl_texture_object *tObj = brw->attribs.Texture->Unit[unit]._Current;
-   struct intel_texture_object *intelObj = intel_texture_object(tObj);
-   struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel];
    struct brw_surface_state surf;
 
    memset(&surf, 0, sizeof(surf));
 
    surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
-   surf.ss0.surface_type = translate_tex_target(tObj->Target);
-   surf.ss0.surface_format = translate_tex_format(firstImage->TexFormat->MesaFormat);
+   surf.ss0.surface_type = translate_tex_target(key->target);
+   surf.ss0.surface_format = translate_tex_format(key->format);
 
    /* This is ok for all textures with channel width 8bit or less:
     */
 /*    surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
 
-   /* Updated in emit_reloc */
-   surf.ss1.base_addr = intelObj->mt->region->buffer->offset;
+   surf.ss1.base_addr = key->bo->offset; /* reloc */
 
-   surf.ss2.mip_count = intelObj->lastLevel - intelObj->firstLevel;
-   surf.ss2.width = firstImage->Width - 1;
-   surf.ss2.height = firstImage->Height - 1;
+   surf.ss2.mip_count = key->last_level - key->first_level;
+   surf.ss2.width = key->width - 1;
+   surf.ss2.height = key->height - 1;
 
    surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR;
-   surf.ss3.tiled_surface = intelObj->mt->region->tiled; /* always zero */
-   surf.ss3.pitch = (intelObj->mt->pitch * intelObj->mt->cpp) - 1;
-   surf.ss3.depth = firstImage->Depth - 1;
+   surf.ss3.tiled_surface = key->tiled;
+   surf.ss3.pitch = (key->pitch * key->cpp) - 1;
+   surf.ss3.depth = key->depth - 1;
 
    surf.ss4.min_lod = 0;
  
-   if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
+   if (key->target == GL_TEXTURE_CUBE_MAP) {
       surf.ss0.cube_pos_x = 1;
       surf.ss0.cube_pos_y = 1;
       surf.ss0.cube_pos_z = 1;
@@ -181,14 +187,82 @@ void brw_update_texture_surface( GLcontext *ctx, GLuint unit )
       surf.ss0.cube_neg_z = 1;
    }
 
-   brw->wm.bind.surf_ss_offset[unit + 1] =
-      brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf );
+   return brw_upload_cache( &brw->cache, BRW_SS_SURFACE,
+			    key, sizeof(*key),
+			    &key->bo, 1,
+			    &surf, sizeof(surf),
+			    NULL, NULL );
 }
 
-
+static void
+brw_update_texture_surface( GLcontext *ctx, GLuint unit )
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct gl_texture_object *tObj = brw->attribs.Texture->Unit[unit]._Current;
+   struct intel_texture_object *intelObj = intel_texture_object(tObj);
+   struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel];
+   struct brw_wm_surface_key key;
+
+   memset(&key, 0, sizeof(key));
+   key.target = tObj->Target;
+   key.format = firstImage->TexFormat->MesaFormat;
+   key.bo = intelObj->mt->region->buffer;
+   key.first_level = intelObj->firstLevel;
+   key.last_level = intelObj->lastLevel;
+   key.width = firstImage->Width;
+   key.height = firstImage->Height;
+   key.pitch = intelObj->mt->pitch;
+   key.cpp = intelObj->mt->cpp;
+   key.depth = firstImage->Depth;
+   key.tiled = intelObj->mt->region->tiled;
+
+   dri_bo_unreference(brw->wm.surf_bo[unit + 1]);
+   brw->wm.surf_bo[unit + 1] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
+						&key, sizeof(key),
+						&key.bo, 1,
+						NULL);
+   if (brw->wm.surf_bo[unit + 1] == NULL)
+      brw->wm.surf_bo[unit + 1] = brw_create_texture_surface(brw, &key);
+}
 
 #define OFFSET(TYPE, FIELD) ( (GLuint)&(((TYPE *)0)->FIELD) )
 
+/**
+ * Constructs the binding table for the WM surface state, which maps unit
+ * numbers to surface state objects.
+ */
+static dri_bo *
+brw_wm_get_binding_table(struct brw_context *brw)
+{
+   dri_bo *bind_bo;
+
+   bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND,
+			      NULL, 0,
+			      brw->wm.surf_bo, brw->wm.nr_surfaces,
+			      NULL);
+
+   if (bind_bo == NULL) {
+      GLuint data_size = brw->wm.nr_surfaces * 4;
+      uint32_t *data = malloc(data_size);
+      int i;
+
+      for (i = 0; i < brw->wm.nr_surfaces; i++)
+         if (brw->wm.surf_bo[i])
+            data[i] = brw->wm.surf_bo[i]->offset;
+         else
+            data[i] = 0;
+
+      bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND,
+				  NULL, 0,
+				  brw->wm.surf_bo, brw->wm.nr_surfaces,
+				  data, data_size,
+				  NULL, NULL);
+
+      free(data);
+   }
+
+   return bind_bo;
+}
 
 static void upload_wm_surfaces(struct brw_context *brw )
 {
@@ -199,15 +273,31 @@ static void upload_wm_surfaces(struct brw_context *brw )
    {
       struct brw_surface_state surf;
       struct intel_region *region = brw->state.draw_region;
+      dri_bo *region_bo;
 
       memset(&surf, 0, sizeof(surf));
 
-      if (region->cpp == 4)
-	 surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
-      else 
-	 surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
+      if (region != NULL) {
+	 if (region->cpp == 4)
+	    surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+	 else
+	    surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
+
+	 surf.ss0.surface_type = BRW_SURFACE_2D;
 
-      surf.ss0.surface_type = BRW_SURFACE_2D;
+	 surf.ss1.base_addr = region->buffer->offset; /* reloc */
+
+	 surf.ss2.width = region->pitch - 1; /* XXX: not really! */
+	 surf.ss2.height = region->height - 1;
+	 surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR;
+	 surf.ss3.tiled_surface = region->tiled;
+	 surf.ss3.pitch = (region->pitch * region->cpp) - 1;
+	 region_bo = region->buffer;
+      } else {
+	 surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+	 surf.ss0.surface_type = BRW_SURFACE_NULL;
+	 region_bo = NULL;
+      }
 
       /* _NEW_COLOR */
       surf.ss0.color_blend = (!brw->attribs.Color->_LogicOpEnabled &&
@@ -219,16 +309,10 @@ static void upload_wm_surfaces(struct brw_context *brw )
       surf.ss0.writedisable_blue =  !brw->attribs.Color->ColorMask[2];
       surf.ss0.writedisable_alpha = !brw->attribs.Color->ColorMask[3];
 
-      /* Updated in emit_reloc */
-      surf.ss1.base_addr = region->buffer->offset;
-
-      surf.ss2.width = region->pitch - 1; /* XXX: not really! */
-      surf.ss2.height = region->height - 1;
-      surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR;
-      surf.ss3.tiled_surface = region->tiled;
-      surf.ss3.pitch = (region->pitch * region->cpp) - 1;
-
-      brw->wm.bind.surf_ss_offset[0] = brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf );
+      /* Key size will never match key size for textures, so we're safe. */
+      dri_bo_unreference(brw->wm.surf_bo[0]);
+      brw->wm.surf_bo[0] = brw_cache_data( &brw->cache, BRW_SS_SURFACE, &surf,
+					   &region_bo, 1 );
 
       brw->wm.nr_surfaces = 1;
    }
@@ -240,7 +324,7 @@ static void upload_wm_surfaces(struct brw_context *brw )
       /* _NEW_TEXTURE, BRW_NEW_TEXDATA 
        */
       if (texUnit->_ReallyEnabled &&
-	  intel_finalize_mipmap_tree(intel,texUnit->_Current))
+	  intel_finalize_mipmap_tree(intel, i))
       {
 	 brw_update_texture_surface(ctx, i);
 	 brw->wm.nr_surfaces = i+2;
@@ -248,45 +332,60 @@ static void upload_wm_surfaces(struct brw_context *brw )
       else if( texUnit->_ReallyEnabled &&
 	       texUnit->_Current == intel->frame_buffer_texobj )
       {
-	 brw->wm.bind.surf_ss_offset[i+1] = brw->wm.bind.surf_ss_offset[0];
+	 dri_bo_unreference(brw->wm.surf_bo[i+1]);
+	 brw->wm.surf_bo[i+1] = brw->wm.surf_bo[0];
+	 dri_bo_reference(brw->wm.surf_bo[i+1]);
 	 brw->wm.nr_surfaces = i+2;
-      }    
-      else {
-	 brw->wm.bind.surf_ss_offset[i+1] = 0;
+      } else {
+	 dri_bo_unreference(brw->wm.surf_bo[i+1]);
+	 brw->wm.surf_bo[i+1] = NULL;
       }
    }
 
-   brw->wm.bind_ss_offset = brw_cache_data( &brw->cache[BRW_SS_SURF_BIND],
-					    &brw->wm.bind );
+   dri_bo_unreference(brw->wm.bind_bo);
+   brw->wm.bind_bo = brw_wm_get_binding_table(brw);
 }
 
 static void emit_reloc_wm_surfaces(struct brw_context *brw)
 {
-   int unit;
-
-   /* Emit framebuffer relocation */
-   dri_emit_reloc(brw_cache_buffer(brw, BRW_SS_SURFACE),
-		  DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE,
-		  0,
-		  brw->wm.bind.surf_ss_offset[0] +
-		  offsetof(struct brw_surface_state, ss1),
-		  brw->state.draw_region->buffer);
+   int unit, i;
+
+   /* Emit SS framebuffer relocation */
+   if (brw->state.draw_region != NULL) {
+      dri_emit_reloc(brw->wm.surf_bo[0],
+		     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE,
+		     0,
+		     offsetof(struct brw_surface_state, ss1),
+		     brw->state.draw_region->buffer);
+   }
 
-   /* Emit relocations for texture buffers */
+   /* Emit SS relocations for texture buffers */
    for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) {
       struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[unit];
       struct gl_texture_object *tObj = texUnit->_Current;
       struct intel_texture_object *intelObj = intel_texture_object(tObj);
 
       if (texUnit->_ReallyEnabled && intelObj->mt != NULL) {
-	 dri_emit_reloc(brw_cache_buffer(brw, BRW_SS_SURFACE),
+	 dri_emit_reloc(brw->wm.surf_bo[unit + 1],
 			DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
 			0,
-			brw->wm.bind.surf_ss_offset[unit + 1] +
 			offsetof(struct brw_surface_state, ss1),
 			intelObj->mt->region->buffer);
       }
    }
+
+   /* Emit binding table relocations to surface state */
+   for (i = 0; i < BRW_WM_MAX_SURF; i++) {
+      if (brw->wm.surf_bo[i] != NULL) {
+	 dri_emit_reloc(brw->wm.bind_bo,
+			DRM_BO_FLAG_MEM_TT |
+			DRM_BO_FLAG_READ |
+			DRM_BO_FLAG_WRITE,
+			0,
+			i * 4,
+			brw->wm.surf_bo[i]);
+      }
+   }
 }
 
 const struct brw_tracked_state brw_wm_surfaces = {
diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
index dd9c871902..dd6c8d17c2 100644..120000
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -1,614 +1 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#include <stdio.h>
-#include <errno.h>
-
-#include "mtypes.h"
-#include "context.h"
-#include "enums.h"
-#include "vblank.h"
-
-#include "intel_reg.h"
-#include "intel_batchbuffer.h"
-#include "intel_context.h"
-#include "intel_blit.h"
-#include "intel_regions.h"
-#include "intel_structs.h"
-
-#include "dri_bufmgr.h"
-
-#define FILE_DEBUG_FLAG DEBUG_BLIT
-
-/*
- * Copy the back buffer to the front buffer. 
- */
-void intelCopyBuffer( __DRIdrawablePrivate *dPriv,
-		      const drm_clip_rect_t *rect ) 
-{
-   struct intel_context *intel;
-   GLboolean   missed_target;
-   int64_t ust;
-
-   DBG("%s\n", __FUNCTION__);
-
-   assert(dPriv);
-   assert(dPriv->driContextPriv);
-   assert(dPriv->driContextPriv->driverPrivate);
-
-   intel = (struct intel_context *) dPriv->driContextPriv->driverPrivate;
-   intelFlush( &intel->ctx );
-
-   if (intel->last_swap_fence) {
-      dri_fence_wait(intel->last_swap_fence);
-      dri_fence_unreference(intel->last_swap_fence);
-      intel->last_swap_fence = NULL;
-   }
-   intel->last_swap_fence = intel->first_swap_fence;
-   intel->first_swap_fence = NULL;
-
-   /* The LOCK_HARDWARE is required for the cliprects.  Buffer offsets
-    * should work regardless.
-    */
-   LOCK_HARDWARE( intel );
-
-   if (!rect)
-   {
-       UNLOCK_HARDWARE( intel );
-       driWaitForVBlank( dPriv, &missed_target );
-       LOCK_HARDWARE( intel );
-   }
-
-   {
-      intelScreenPrivate *intelScreen = intel->intelScreen;
-      __DRIdrawablePrivate *dPriv = intel->driDrawable;
-      int nbox = dPriv->numClipRects;
-      drm_clip_rect_t *pbox = dPriv->pClipRects;
-      int cpp = intelScreen->cpp;
-      struct intel_region *src, *dst;
-      int BR13, CMD;
-      int i;
-      int src_pitch, dst_pitch;
-
-      if (intel->sarea->pf_current_page == 0) {
-	 dst = intel->front_region;
-	 src = intel->back_region;
-      }
-      else {
-	 assert(0);
-	 src = intel->front_region;
-	 dst = intel->back_region;
-      }
-
-      src_pitch = src->pitch * src->cpp;
-      dst_pitch = dst->pitch * dst->cpp;
-
-      if (cpp == 2) {
-	 BR13 = (0xCC << 16) | (1<<24);
-	 CMD = XY_SRC_COPY_BLT_CMD;
-      } 
-      else {
-	 BR13 = (0xCC << 16) | (1<<24) | (1<<25);
-	 CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
-      }
-
-      if (src->tiled) {
-	 CMD |= XY_SRC_TILED;
-	 src_pitch /= 4;
-      }
-      
-      if (dst->tiled) {
-	 CMD |= XY_DST_TILED;
- 	 dst_pitch /= 4;
-      }
-  
-      for (i = 0 ; i < nbox; i++, pbox++) 
-      {
-	 drm_clip_rect_t tmp = *pbox;
-
-	 if (rect) {
-	    if (!intel_intersect_cliprects(&tmp, &tmp, rect))
-	       continue;
-	 }
-
-
-	 if (tmp.x1 > tmp.x2 ||
-	     tmp.y1 > tmp.y2 ||
-	     tmp.x2 > intelScreen->width ||
-	     tmp.y2 > intelScreen->height)
-	    continue;
- 
-	 BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS);
-	 OUT_BATCH( CMD );
-	 OUT_BATCH( dst_pitch | BR13 );
-	 OUT_BATCH( (tmp.y1 << 16) | tmp.x1 );
-	 OUT_BATCH( (tmp.y2 << 16) | tmp.x2 );
-	 OUT_RELOC( dst->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, 0 );
-	 OUT_BATCH( (tmp.y1 << 16) | tmp.x1 );
-	 OUT_BATCH( src_pitch );
-	 OUT_RELOC( src->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0 );
-	 ADVANCE_BATCH();
-      }
-   }
-
-   if (intel->first_swap_fence)
-      dri_fence_unreference(intel->first_swap_fence);
-   intel_batchbuffer_flush(intel->batch);
-   intel->first_swap_fence = intel->batch->last_fence;
-   if (intel->first_swap_fence != NULL)
-      dri_fence_reference(intel->first_swap_fence);
-   UNLOCK_HARDWARE( intel );
-
-   if (!rect)
-   {
-       intel->swap_count++;
-       (*dri_interface->getUST)(&ust);
-       if (missed_target) {
-	   intel->swap_missed_count++;
-	   intel->swap_missed_ust = ust -  intel->swap_ust;
-       }
-   
-       intel->swap_ust = ust;
-   }
-
-}
-
-
-
-
-void intelEmitFillBlit( struct intel_context *intel,
-			GLuint cpp,
-			GLshort dst_pitch,
-			dri_bo *dst_buffer,
-			GLuint dst_offset,
-			GLboolean dst_tiled,
-			GLshort x, GLshort y, 
-			GLshort w, GLshort h,
-			GLuint color )
-{
-   GLuint BR13, CMD;
-   BATCH_LOCALS;
-
-   dst_pitch *= cpp;
-
-   switch(cpp) {
-   case 1: 
-   case 2: 
-   case 3: 
-      BR13 = (0xF0 << 16) | (1<<24);
-      CMD = XY_COLOR_BLT_CMD;
-      break;
-   case 4:
-      BR13 = (0xF0 << 16) | (1<<24) | (1<<25);
-      CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
-      break;
-   default:
-      return;
-   }
-
-   if (dst_tiled) {
-      CMD |= XY_DST_TILED;
-      dst_pitch /= 4;
-   }
-
-   BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS);
-   OUT_BATCH( CMD );
-   OUT_BATCH( dst_pitch | BR13 );
-   OUT_BATCH( (y << 16) | x );
-   OUT_BATCH( ((y+h) << 16) | (x+w) );
-   OUT_RELOC( dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset );
-   OUT_BATCH( color );
-   ADVANCE_BATCH();
-}
-
-static GLuint translate_raster_op(GLenum logicop)
-{
-   switch(logicop) {
-   case GL_CLEAR: return 0x00;
-   case GL_AND: return 0x88;
-   case GL_AND_REVERSE: return 0x44;
-   case GL_COPY: return 0xCC;
-   case GL_AND_INVERTED: return 0x22;
-   case GL_NOOP: return 0xAA;
-   case GL_XOR: return 0x66;
-   case GL_OR: return 0xEE;
-   case GL_NOR: return 0x11;
-   case GL_EQUIV: return 0x99;
-   case GL_INVERT: return 0x55;
-   case GL_OR_REVERSE: return 0xDD;
-   case GL_COPY_INVERTED: return 0x33;
-   case GL_OR_INVERTED: return 0xBB;
-   case GL_NAND: return 0x77;
-   case GL_SET: return 0xFF;
-   default: return 0;
-   }
-}
-
-
-/* Copy BitBlt
- */
-void intelEmitCopyBlit( struct intel_context *intel,
-			GLuint cpp,
-			GLshort src_pitch,
-			dri_bo *src_buffer,
-			GLuint  src_offset,
-			GLboolean src_tiled,
-			GLshort dst_pitch,
-			dri_bo *dst_buffer,
-			GLuint  dst_offset,
-			GLboolean dst_tiled,
-			GLshort src_x, GLshort src_y,
-			GLshort dst_x, GLshort dst_y,
-			GLshort w, GLshort h,
-			GLenum logic_op )
-{
-   GLuint CMD, BR13;
-   int dst_y2 = dst_y + h;
-   int dst_x2 = dst_x + w;
-   BATCH_LOCALS;
-
-
-   DBG("%s src:buf(%d)/%d %d,%d dst:buf(%d)/%d %d,%d sz:%dx%d op:%d\n",
-       __FUNCTION__,
-       src_buffer, src_pitch, src_x, src_y,
-       dst_buffer, dst_pitch, dst_x, dst_y,
-       w,h,logic_op);
-
-   assert( logic_op - GL_CLEAR >= 0 );
-   assert( logic_op - GL_CLEAR < 0x10 );
-      
-   src_pitch *= cpp;
-   dst_pitch *= cpp;
-
-   switch(cpp) {
-   case 1: 
-   case 2: 
-   case 3: 
-      BR13 = (translate_raster_op(logic_op) << 16) | (1<<24);
-      CMD = XY_SRC_COPY_BLT_CMD;
-      break;
-   case 4:
-      BR13 = (translate_raster_op(logic_op) << 16) | (1<<24) |
-	  (1<<25);
-      CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
-      break;
-   default:
-      return;
-   }
-
-   if (src_tiled) {
-      CMD |= XY_SRC_TILED;
-      src_pitch /= 4;
-   }
-   
-   if (dst_tiled) {
-      CMD |= XY_DST_TILED;
-      dst_pitch /= 4;
-   }
-
-   if (dst_y2 < dst_y ||
-       dst_x2 < dst_x) {
-      return;
-   }
-
-   dst_pitch &= 0xffff;
-   src_pitch &= 0xffff;
-
-   /* Initial y values don't seem to work with negative pitches.  If
-    * we adjust the offsets manually (below), it seems to work fine.
-    *
-    * On the other hand, if we always adjust, the hardware doesn't
-    * know which blit directions to use, so overlapping copypixels get
-    * the wrong result.
-    */
-   if (dst_pitch > 0 && src_pitch > 0) {
-      BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS);
-      OUT_BATCH( CMD );
-      OUT_BATCH( dst_pitch | BR13 );
-      OUT_BATCH( (dst_y << 16) | dst_x );
-      OUT_BATCH( (dst_y2 << 16) | dst_x2 );
-      OUT_RELOC( dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
-		 dst_offset );
-      OUT_BATCH( (src_y << 16) | src_x );
-      OUT_BATCH( src_pitch );
-      OUT_RELOC( src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
-		 src_offset );
-      ADVANCE_BATCH();
-   }
-   else {
-      BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS);
-      OUT_BATCH( CMD );
-      OUT_BATCH( (dst_pitch & 0xffff) | BR13 );
-      OUT_BATCH( (0 << 16) | dst_x );
-      OUT_BATCH( (h << 16) | dst_x2 );
-      OUT_RELOC( dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
-		 dst_offset + dst_y * dst_pitch );
-      OUT_BATCH( (src_pitch & 0xffff) );
-      OUT_RELOC( src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
-		 src_offset + src_y * src_pitch );
-      ADVANCE_BATCH();
-   }
-}
-
-
-
-void intelClearWithBlit(GLcontext *ctx, GLbitfield flags)
-{
-   struct intel_context *intel = intel_context( ctx );
-   intelScreenPrivate *intelScreen = intel->intelScreen;
-   GLuint clear_depth, clear_color;
-   GLint cx, cy, cw, ch;
-   GLint cpp = intelScreen->cpp;
-   GLboolean all;
-   GLint i;
-   struct intel_region *front = intel->front_region;
-   struct intel_region *back = intel->back_region;
-   struct intel_region *depth = intel->depth_region;
-   GLuint BR13, FRONT_CMD, BACK_CMD, DEPTH_CMD;
-   GLuint front_pitch;
-   GLuint back_pitch;
-   GLuint depth_pitch;
-   BATCH_LOCALS;
-
-   
-   clear_color = intel->ClearColor;
-   clear_depth = 0;
-
-   if (flags & BUFFER_BIT_DEPTH) {
-      clear_depth = (GLuint)(ctx->Depth.Clear * intel->ClearDepth);
-   }
-
-   if (flags & BUFFER_BIT_STENCIL) {
-      clear_depth |= (ctx->Stencil.Clear & 0xff) << 24;
-   }
-
-   switch(cpp) {
-   case 2: 
-      BR13 = (0xF0 << 16) | (1<<24);
-      BACK_CMD  = FRONT_CMD = XY_COLOR_BLT_CMD;
-      DEPTH_CMD = XY_COLOR_BLT_CMD;
-      break;
-   case 4:
-      BR13 = (0xF0 << 16) | (1<<24) | (1<<25);
-      BACK_CMD = FRONT_CMD = XY_COLOR_BLT_CMD |
-	 XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
-      DEPTH_CMD = XY_COLOR_BLT_CMD;
-      if (flags & BUFFER_BIT_DEPTH) DEPTH_CMD |= XY_BLT_WRITE_RGB;
-      if (flags & BUFFER_BIT_STENCIL) DEPTH_CMD |= XY_BLT_WRITE_ALPHA;
-      break;
-   default:
-      return;
-   }
-
-
-
-   intelFlush( &intel->ctx );
-   LOCK_HARDWARE( intel );
-   {
-      /* get clear bounds after locking */
-      cx = ctx->DrawBuffer->_Xmin;
-      cy = ctx->DrawBuffer->_Ymin;
-      ch = ctx->DrawBuffer->_Ymax - ctx->DrawBuffer->_Ymin;
-      cw = ctx->DrawBuffer->_Xmax - ctx->DrawBuffer->_Xmin;
-      all = (cw == ctx->DrawBuffer->Width && ch == ctx->DrawBuffer->Height);
-
-      /* flip top to bottom */
-      cy = intel->driDrawable->h - cy - ch;
-      cx = cx + intel->drawX;
-      cy += intel->drawY;
-
-      /* adjust for page flipping */
-      if ( intel->sarea->pf_current_page == 0 ) {
-	 front = intel->front_region;
-	 back = intel->back_region;
-      } 
-      else {
-	 back = intel->front_region;
-	 front = intel->back_region;
-      }
-      
-      front_pitch = front->pitch * front->cpp;
-      back_pitch = back->pitch * back->cpp;
-      depth_pitch = depth->pitch * depth->cpp;
-      
-      if (front->tiled) {
-	 FRONT_CMD |= XY_DST_TILED;
-	 front_pitch /= 4;
-      }
-
-      if (back->tiled) {
-	 BACK_CMD |= XY_DST_TILED;
-	 back_pitch /= 4;
-      }
-
-      if (depth->tiled) {
-	 DEPTH_CMD |= XY_DST_TILED;
-	 depth_pitch /= 4;
-      }
-
-      for (i = 0 ; i < intel->numClipRects ; i++) 
-      { 	 
-	 drm_clip_rect_t *box = &intel->pClipRects[i];	 
-	 drm_clip_rect_t b;
-
-	 if (!all) {
-	    GLint x = box->x1;
-	    GLint y = box->y1;
-	    GLint w = box->x2 - x;
-	    GLint h = box->y2 - y;
-
-	    if (x < cx) w -= cx - x, x = cx; 
-	    if (y < cy) h -= cy - y, y = cy;
-	    if (x + w > cx + cw) w = cx + cw - x;
-	    if (y + h > cy + ch) h = cy + ch - y;
-	    if (w <= 0) continue;
-	    if (h <= 0) continue;
-
-	    b.x1 = x;
-	    b.y1 = y;
-	    b.x2 = x + w;
-	    b.y2 = y + h;      
-	 } else {
-	    b = *box;
-	 }
-
-
-	 if (b.x1 > b.x2 ||
-	     b.y1 > b.y2 ||
-	     b.x2 > intelScreen->width ||
-	     b.y2 > intelScreen->height)
-	    continue;
-
-	 if ( flags & BUFFER_BIT_FRONT_LEFT ) {	    
-	    BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS);
-	    OUT_BATCH( FRONT_CMD );
-	    OUT_BATCH( front_pitch | BR13 );
-	    OUT_BATCH( (b.y1 << 16) | b.x1 );
-	    OUT_BATCH( (b.y2 << 16) | b.x2 );
-	    OUT_RELOC( front->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
-		       0 );
-	    OUT_BATCH( clear_color );
-	    ADVANCE_BATCH();
-	 }
-
-	 if ( flags & BUFFER_BIT_BACK_LEFT ) {
-	    BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); 
-	    OUT_BATCH( BACK_CMD );
-	    OUT_BATCH( back_pitch | BR13 );
-	    OUT_BATCH( (b.y1 << 16) | b.x1 );
-	    OUT_BATCH( (b.y2 << 16) | b.x2 );
-	    OUT_RELOC( back->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
-		       0 );
-	    OUT_BATCH( clear_color );
-	    ADVANCE_BATCH();
-	 }
-
-	 if ( flags & (BUFFER_BIT_STENCIL | BUFFER_BIT_DEPTH) ) {
-	    BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS);
-	    OUT_BATCH( DEPTH_CMD );
-	    OUT_BATCH( depth_pitch | BR13 );
-	    OUT_BATCH( (b.y1 << 16) | b.x1 );
-	    OUT_BATCH( (b.y2 << 16) | b.x2 );
-	    OUT_RELOC( depth->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
-		       0 );
-	    OUT_BATCH( clear_depth );
-	    ADVANCE_BATCH();
-	 }      
-      }
-   }
-   intel_batchbuffer_flush( intel->batch );
-   UNLOCK_HARDWARE( intel );
-}
-
-
-void
-intelEmitImmediateColorExpandBlit(struct intel_context *intel,
-				  GLuint cpp,
-				  GLubyte *src_bits, GLuint src_size,
-				  GLuint fg_color,
-				  GLshort dst_pitch,
-				  dri_bo *dst_buffer,
-				  GLuint dst_offset,
-				  GLboolean dst_tiled,
-				  GLshort x, GLshort y, 
-				  GLshort w, GLshort h,
-				  GLenum logic_op)
-{
-   struct xy_text_immediate_blit text;
-   int dwords = ALIGN(src_size, 8) / 4;
-   uint32_t opcode, br13;
-
-   assert( logic_op - GL_CLEAR >= 0 );
-   assert( logic_op - GL_CLEAR < 0x10 );
-
-   if (w < 0 || h < 0) 
-      return;
-
-   dst_pitch *= cpp;
-
-   if (dst_tiled) 
-      dst_pitch /= 4;
-
-   DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n",
-       __FUNCTION__,
-       dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);
-
-   memset(&text, 0, sizeof(text));
-   text.dw0.client = CLIENT_2D;
-   text.dw0.opcode = OPCODE_XY_TEXT_IMMEDIATE_BLT;
-   text.dw0.pad0 = 0;
-   text.dw0.byte_packed = 1;	/* ?maybe? */
-   text.dw0.pad1 = 0;
-   text.dw0.dst_tiled = dst_tiled;
-   text.dw0.pad2 = 0;
-   text.dw0.length = (sizeof(text)/sizeof(int)) - 2 + dwords;
-   text.dw1.dest_y1 = y;	/* duplicates info in setup blit */
-   text.dw1.dest_x1 = x;
-   text.dw2.dest_y2 = y + h;
-   text.dw2.dest_x2 = x + w;
-
-   intel_batchbuffer_require_space( intel->batch,
-				    (8 * 4) +
-				    sizeof(text) + 
-				    dwords,
-				    INTEL_BATCH_NO_CLIPRECTS );
-
-   opcode = XY_SETUP_BLT_CMD;
-   if (cpp == 4)
-      opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
-   if (dst_tiled)
-      opcode |= XY_DST_TILED;
-
-   br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29);
-   if (cpp == 2)
-      br13 |= BR13_565;
-   else
-      br13 |= BR13_8888;
-
-   BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS);
-   OUT_BATCH(opcode);
-   OUT_BATCH(br13);
-   OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
-   OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */
-   OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset);
-   OUT_BATCH(0); /* bg */
-   OUT_BATCH(fg_color); /* fg */
-   OUT_BATCH(0); /* pattern base addr */
-   ADVANCE_BATCH();
-
-   intel_batchbuffer_data( intel->batch,
-			   &text,
-			   sizeof(text),
-			   INTEL_BATCH_NO_CLIPRECTS );
-
-   intel_batchbuffer_data( intel->batch,
-			   src_bits,
-			   dwords * 4,
-			   INTEL_BATCH_NO_CLIPRECTS );
-}
-
+../intel/intel_blit.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_blit.h b/src/mesa/drivers/dri/i965/intel_blit.h
deleted file mode 100644
index b7d556b1a9..0000000000
--- a/src/mesa/drivers/dri/i965/intel_blit.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef INTEL_BLIT_H
-#define INTEL_BLIT_H
-
-#include "intel_context.h"
-#include "intel_ioctl.h"
-
-extern void intelCopyBuffer( __DRIdrawablePrivate *dpriv,
-			     const drm_clip_rect_t *rect );
-extern void intelClearWithBlit(GLcontext *ctx, GLbitfield mask);
-
-extern void intelEmitCopyBlit( struct intel_context *intel,
-			       GLuint cpp,
-			       GLshort src_pitch,
-			       dri_bo *src_buffer,
-			       GLuint  src_offset,
-			       GLboolean src_tiled,
-			       GLshort dst_pitch,
-			       dri_bo *dst_buffer,
-			       GLuint  dst_offset,
-			       GLboolean dst_tiled,
-			       GLshort srcx, GLshort srcy,
-			       GLshort dstx, GLshort dsty,
-			       GLshort w, GLshort h,
-			       GLenum logic_op );
-
-extern void intelEmitFillBlit( struct intel_context *intel,
-			       GLuint cpp,
-			       GLshort dst_pitch,
-			       dri_bo *dst_buffer,
-			       GLuint dst_offset,
-			       GLboolean dst_tiled,
-			       GLshort x, GLshort y, 
-			       GLshort w, GLshort h,
-			       GLuint color );
-
-void
-intelEmitImmediateColorExpandBlit(struct intel_context *intel,
-				  GLuint cpp,
-				  GLubyte *src_bits, GLuint src_size,
-				  GLuint fg_color,
-				  GLshort dst_pitch,
-				  dri_bo *dst_buffer,
-				  GLuint dst_offset,
-				  GLboolean dst_tiled,
-				  GLshort dst_x, GLshort dst_y, 
-				  GLshort w, GLshort h,
-				  GLenum logic_op );
-
-#endif
diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
index 56c1666ac6..e06dd3c8d3 100644..120000
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
@@ -1,222 +1 @@
-/**************************************************************************
- * 
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#include "imports.h"
-#include "mtypes.h"
-#include "bufferobj.h"
-
-#include "intel_context.h"
-#include "intel_buffer_objects.h"
-#include "dri_bufmgr.h"
-
-/** Allocates a new dri_bo to store the data for the buffer object. */
-static void
-intel_bufferobj_alloc_buffer(struct intel_context *intel,
-			     struct intel_buffer_object *intel_obj)
-{
-   intel_obj->buffer = dri_bo_alloc(intel->bufmgr, "bufferobj",
-				    intel_obj->Base.Size, 64,
-				    DRM_BO_FLAG_MEM_TT);
-}
-
-/**
- * There is some duplication between mesa's bufferobjects and our
- * bufmgr buffers.  Both have an integer handle and a hashtable to
- * lookup an opaque structure.  It would be nice if the handles and
- * internal structure where somehow shared.
- */
-static struct gl_buffer_object *intel_bufferobj_alloc( GLcontext *ctx, 
-						       GLuint name, 
-						       GLenum target )
-{
-   struct intel_buffer_object *obj = CALLOC_STRUCT(intel_buffer_object);
-
-   _mesa_initialize_buffer_object(&obj->Base, name, target);
-
-   return &obj->Base;
-}
-
-
-/**
- * Deallocate/free a vertex/pixel buffer object.
- * Called via glDeleteBuffersARB().
- */
-static void intel_bufferobj_free( GLcontext *ctx, 
-				  struct gl_buffer_object *obj )
-{ 
-   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
-
-   assert(intel_obj);
-
-   if (intel_obj->buffer)
-      dri_bo_unreference(intel_obj->buffer);
-
-   _mesa_free(intel_obj);
-}
-
-
-
-/**
- * Allocate space for and store data in a buffer object.  Any data that was
- * previously stored in the buffer object is lost.  If data is NULL,
- * memory will be allocated, but no copy will occur.
- * Called via glBufferDataARB().
- */
-static void intel_bufferobj_data( GLcontext *ctx, 
-				  GLenum target, 
-				  GLsizeiptrARB size,
-				  const GLvoid *data, 
-				  GLenum usage,
-				  struct gl_buffer_object *obj )
-{
-   struct intel_context *intel = intel_context(ctx);
-   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
-
-   /* XXX: do something useful with 'usage' (eg. populate flags
-    * argument below)
-    */
-   assert(intel_obj);
-
-   obj->Size = size;
-   obj->Usage = usage;
-
-   /* While it would seem to make sense to always reallocate the buffer here,
-    * since it should allow us better concurrency between rendering and
-    * map-cpu write-unmap, doing so was a minor (~10%) performance loss
-    * for both classic and TTM mode with openarena.  That may change with
-    * improved buffer manager algorithms.
-    */
-   if (intel_obj->buffer != NULL && intel_obj->buffer->size != size) {
-      dri_bo_unreference(intel_obj->buffer);
-      intel_obj->buffer = NULL;
-   }
-   if (size != 0) {
-      if (intel_obj->buffer == NULL)
-	 intel_bufferobj_alloc_buffer(intel, intel_obj);
-
-      if (data != NULL)
-	 dri_bo_subdata(intel_obj->buffer, 0, size, data);
-   }
-}
-
-
-/**
- * Replace data in a subrange of buffer object.  If the data range
- * specified by size + offset extends beyond the end of the buffer or
- * if data is NULL, no copy is performed.
- * Called via glBufferSubDataARB().
- */
-static void intel_bufferobj_subdata( GLcontext *ctx, 
-				     GLenum target, 
-				     GLintptrARB offset,
-				     GLsizeiptrARB size, 
-				     const GLvoid * data,
-				     struct gl_buffer_object * obj )
-{
-   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
-
-   assert(intel_obj);
-   dri_bo_subdata(intel_obj->buffer, offset, size, data);
-}
-
-
-/**
- * Called via glGetBufferSubDataARB().
- */
-static void intel_bufferobj_get_subdata( GLcontext *ctx, 
-					 GLenum target, 
-					 GLintptrARB offset,
-					 GLsizeiptrARB size, 
-					 GLvoid * data,
-					 struct gl_buffer_object * obj )
-{
-   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
-
-   assert(intel_obj);
-   dri_bo_get_subdata(intel_obj->buffer, offset, size, data);
-}
-
-
-
-/**
- * Called via glMapBufferARB().
- */
-static void *intel_bufferobj_map( GLcontext *ctx, 
-				  GLenum target, 
-				  GLenum access,
-				  struct gl_buffer_object *obj )
-{
-   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
-
-   /* XXX: Translate access to flags arg below:
-    */
-   assert(intel_obj);
-   assert(intel_obj->buffer);
-
-   dri_bo_map(intel_obj->buffer, GL_TRUE);
-   obj->Pointer = intel_obj->buffer->virtual;
-   return obj->Pointer;
-}
-
-
-/**
- * Called via glMapBufferARB().
- */
-static GLboolean intel_bufferobj_unmap( GLcontext *ctx,
-					GLenum target,
-					struct gl_buffer_object *obj )
-{
-   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
-
-   assert(intel_obj);
-   assert(intel_obj->buffer);
-   assert(obj->Pointer);
-   dri_bo_unmap(intel_obj->buffer);
-   obj->Pointer = NULL;
-   return GL_TRUE;
-}
-
-dri_bo *intel_bufferobj_buffer( const struct intel_buffer_object *intel_obj )
-{
-   assert(intel_obj->Base.Name);
-   assert(intel_obj->buffer);
-   return intel_obj->buffer;
-}  
-
-void intel_bufferobj_init( struct intel_context *intel )
-{
-   GLcontext *ctx = &intel->ctx;
-
-   ctx->Driver.NewBufferObject = intel_bufferobj_alloc;
-   ctx->Driver.DeleteBuffer = intel_bufferobj_free;
-   ctx->Driver.BufferData = intel_bufferobj_data;
-   ctx->Driver.BufferSubData = intel_bufferobj_subdata;
-   ctx->Driver.GetBufferSubData = intel_bufferobj_get_subdata;
-   ctx->Driver.MapBuffer = intel_bufferobj_map;
-   ctx->Driver.UnmapBuffer = intel_bufferobj_unmap;
-}
+../intel/intel_buffer_objects.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.h b/src/mesa/drivers/dri/i965/intel_buffer_objects.h
deleted file mode 100644
index a80f448716..0000000000
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.h
+++ /dev/null
@@ -1,70 +0,0 @@
- /**************************************************************************
- * 
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef INTEL_BUFFEROBJ_H
-#define INTEL_BUFFEROBJ_H
-
-#include "mtypes.h"
-
-struct intel_context;
-struct gl_buffer_object;
-
-
-/**
- * Intel vertex/pixel buffer object, derived from Mesa's gl_buffer_object.
- */
-struct intel_buffer_object {
-   struct gl_buffer_object Base;
-   dri_bo *buffer;   /* the low-level buffer manager's buffer handle */
-};
-
-
-/* Get the bm buffer associated with a GL bufferobject:
- */
-dri_bo *intel_bufferobj_buffer( const struct intel_buffer_object *obj );
-
-/* Hook the bufferobject implementation into mesa: 
- */
-void intel_bufferobj_init( struct intel_context *intel );
-
-
-
-/* Are the obj->Name tests necessary?  Unfortunately yes, mesa
- * allocates a couple of gl_buffer_object structs statically, and
- * the Name == 0 test is the only way to identify them and avoid
- * casting them erroneously to our structs.
- */
-static inline struct intel_buffer_object *
-intel_buffer_object( struct gl_buffer_object *obj )
-{
-   if (obj->Name)
-      return (struct intel_buffer_object *)obj;
-   else
-      return NULL;
-}
-
-#endif
diff --git a/src/mesa/drivers/dri/i965/intel_buffers.c b/src/mesa/drivers/dri/i965/intel_buffers.c
index 406aa93d06..c86daa49f4 100644..120000
--- a/src/mesa/drivers/dri/i965/intel_buffers.c
+++ b/src/mesa/drivers/dri/i965/intel_buffers.c
@@ -1,587 +1 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "intel_screen.h"
-#include "intel_context.h"
-#include "intel_blit.h"
-#include "intel_regions.h"
-#include "intel_batchbuffer.h"
-#include "context.h"
-#include "framebuffer.h"
-#include "macros.h"
-#include "utils.h"
-#include "vblank.h"
-#include "swrast/swrast.h"
-
-GLboolean intel_intersect_cliprects( drm_clip_rect_t *dst,
-				     const drm_clip_rect_t *a,
-				     const drm_clip_rect_t *b )
-{
-   dst->x1 = MAX2(a->x1, b->x1);
-   dst->x2 = MIN2(a->x2, b->x2);
-   dst->y1 = MAX2(a->y1, b->y1);
-   dst->y2 = MIN2(a->y2, b->y2);
-
-   return (dst->x1 <= dst->x2 &&
-	   dst->y1 <= dst->y2);
-}
-
-struct intel_region *intel_drawbuf_region( struct intel_context *intel )
-{
-   switch (intel->ctx.DrawBuffer->_ColorDrawBufferMask[0]) {
-   case BUFFER_BIT_FRONT_LEFT:
-      return intel->front_region;
-   case BUFFER_BIT_BACK_LEFT:
-      return intel->back_region;
-   default:
-      /* Not necessary to fallback - could handle either NONE or
-       * FRONT_AND_BACK cases below.
-       */
-      return NULL;		
-   }
-}
-
-struct intel_region *intel_readbuf_region( struct intel_context *intel )
-{
-   GLcontext *ctx = &intel->ctx;
-
-   /* This will have to change to support EXT_fbo's, but is correct
-    * for now:
-    */
-   switch (ctx->ReadBuffer->_ColorReadBufferIndex) {
-   case BUFFER_FRONT_LEFT:
-      return intel->front_region;
-   case BUFFER_BACK_LEFT:
-      return intel->back_region;
-   default:
-      assert(0);
-      return NULL;
-   }
-}
-
-
-
-static void intelBufferSize(GLframebuffer *buffer,
-			    GLuint *width, 
-			    GLuint *height)
-{
-   GET_CURRENT_CONTEXT(ctx);
-   struct intel_context *intel = intel_context(ctx);
-   /* Need to lock to make sure the driDrawable is uptodate.  This
-    * information is used to resize Mesa's software buffers, so it has
-    * to be correct.
-    */
-   LOCK_HARDWARE(intel);
-   if (intel->driDrawable) {
-      *width = intel->driDrawable->w;
-      *height = intel->driDrawable->h;
-   }
-   else {
-      *width = 0;
-      *height = 0;
-   }
-   UNLOCK_HARDWARE(intel);
-}
-
-
-static void intelSetFrontClipRects( struct intel_context *intel )
-{
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;
-
-   if (!dPriv) return;
-
-   intel->numClipRects = dPriv->numClipRects;
-   intel->pClipRects = dPriv->pClipRects;
-   intel->drawX = dPriv->x;
-   intel->drawY = dPriv->y;
-}
-
-
-static void intelSetBackClipRects( struct intel_context *intel )
-{
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;
-
-   if (!dPriv) return;
-
-   if (intel->sarea->pf_enabled == 0 && dPriv->numBackClipRects == 0) {
-      intel->numClipRects = dPriv->numClipRects;
-      intel->pClipRects = dPriv->pClipRects;
-      intel->drawX = dPriv->x;
-      intel->drawY = dPriv->y;
-   } else {
-      intel->numClipRects = dPriv->numBackClipRects;
-      intel->pClipRects = dPriv->pBackClipRects;
-      intel->drawX = dPriv->backX;
-      intel->drawY = dPriv->backY;
-      
-      if (dPriv->numBackClipRects == 1 &&
-	  dPriv->x == dPriv->backX &&
-	  dPriv->y == dPriv->backY) {
-      
-	 /* Repeat the calculation of the back cliprect dimensions here
-	  * as early versions of dri.a in the Xserver are incorrect.  Try
-	  * very hard not to restrict future versions of dri.a which
-	  * might eg. allocate truly private back buffers.
-	  */
-	 int x1, y1;
-	 int x2, y2;
-	 
-	 x1 = dPriv->x;
-	 y1 = dPriv->y;      
-	 x2 = dPriv->x + dPriv->w;
-	 y2 = dPriv->y + dPriv->h;
-	 
-	 if (x1 < 0) x1 = 0;
-	 if (y1 < 0) y1 = 0;
-	 if (x2 > intel->intelScreen->width) x2 = intel->intelScreen->width;
-	 if (y2 > intel->intelScreen->height) y2 = intel->intelScreen->height;
-
-	 if (x1 == dPriv->pBackClipRects[0].x1 &&
-	     y1 == dPriv->pBackClipRects[0].y1) {
-
-	    dPriv->pBackClipRects[0].x2 = x2;
-	    dPriv->pBackClipRects[0].y2 = y2;
-	 }
-      }
-   }
-}
-
-
-void intelWindowMoved( struct intel_context *intel )
-{
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;
-
-   if (!intel->ctx.DrawBuffer) {
-      intelSetFrontClipRects( intel );
-   }
-   else {
-      switch (intel->ctx.DrawBuffer->_ColorDrawBufferMask[0]) {
-      case BUFFER_BIT_FRONT_LEFT:
-	 intelSetFrontClipRects( intel );
-	 break;
-      case BUFFER_BIT_BACK_LEFT:
-	 intelSetBackClipRects( intel );
-	 break;
-      default:
-	 /* glDrawBuffer(GL_NONE or GL_FRONT_AND_BACK): software fallback */
-	 intelSetFrontClipRects( intel );
-      }
-   }
-
-   /* Get updated plane info so we sync against the right vblank counter */
-   if (intel->intelScreen->driScrnPriv->ddx_version.minor >= 7) {
-      drmI830Sarea *sarea = intel->sarea;
-      drm_clip_rect_t drw_rect = { .x1 = dPriv->x, .x2 = dPriv->x + dPriv->w,
-				   .y1 = dPriv->y, .y2 = dPriv->y + dPriv->h };
-      drm_clip_rect_t planeA_rect = { .x1 = sarea->planeA_x, .y1 = sarea->planeA_y,
-				     .x2 = sarea->planeA_x + sarea->planeA_w,
-				     .y2 = sarea->planeA_y + sarea->planeA_h };
-      drm_clip_rect_t planeB_rect = { .x1 = sarea->planeB_x, .y1 = sarea->planeB_y,
-				     .x2 = sarea->planeB_x + sarea->planeB_w,
-				     .y2 = sarea->planeB_y + sarea->planeB_h };
-      GLint areaA = driIntersectArea( drw_rect, planeA_rect );
-      GLint areaB = driIntersectArea( drw_rect, planeB_rect );
-      GLuint flags = dPriv->vblFlags;
-
-      /* Update vblank info
-       */
-      if (areaB > areaA || (areaA == areaB && areaB > 0)) {
-	 flags = dPriv->vblFlags | VBLANK_FLAG_SECONDARY;
-      } else {
-	 flags = dPriv->vblFlags & ~VBLANK_FLAG_SECONDARY;
-      }
-
-      /* Check to see if we changed pipes */
-      if (flags != dPriv->vblFlags && dPriv->vblFlags &&
-	  !(dPriv->vblFlags & VBLANK_FLAG_NO_IRQ)) {
-	 int64_t count;
-
-	 /*
-	  * Update msc_base from old pipe
-	  */
-	 driDrawableGetMSC32(dPriv->driScreenPriv, dPriv, &count);
-	 dPriv->msc_base = count;
-	 /*
-	  * Then get new vblank_base and vblSeq values
-	  */
-	 dPriv->vblFlags = flags;
-	 driGetCurrentVBlank(dPriv);
-	 dPriv->vblank_base = dPriv->vblSeq;
-      }
-   } else {
-      dPriv->vblFlags &= ~VBLANK_FLAG_SECONDARY;
-   }
-
-   _mesa_resize_framebuffer(&intel->ctx,
-			    (GLframebuffer*)dPriv->driverPrivate,
-			    dPriv->w, dPriv->h);
-
-   /* Set state we know depends on drawable parameters:
-    */
-   {
-      GLcontext *ctx = &intel->ctx;
-
-      if (ctx->Driver.Scissor)
-	 ctx->Driver.Scissor( ctx, ctx->Scissor.X, ctx->Scissor.Y,
-			      ctx->Scissor.Width, ctx->Scissor.Height );
-      
-      if (ctx->Driver.DepthRange)
-	 ctx->Driver.DepthRange( ctx, 
-				 ctx->Viewport.Near,
-				 ctx->Viewport.Far );
-
-      intel->NewGLState |= _NEW_SCISSOR;
-   }
-
-   /* This works because the lock is always grabbed before emitting
-    * commands and commands are always flushed prior to releasing
-    * the lock.
-    */
-   intel->NewGLState |= _NEW_WINDOW_POS; 
-}
-
-
-
-/* A true meta version of this would be very simple and additionally
- * machine independent.  Maybe we'll get there one day.
- */
-static void intelClearWithTris(struct intel_context *intel, 
-			       GLbitfield mask)
-{
-   GLcontext *ctx = &intel->ctx;
-   drm_clip_rect_t clear;
-   GLint cx, cy, cw, ch;
-
-   if (INTEL_DEBUG & DEBUG_DRI)
-      _mesa_printf("%s %x\n", __FUNCTION__, mask);
-
-   {
-
-      intel->vtbl.install_meta_state(intel);
-
-      /* Get clear bounds after locking */
-      cx = ctx->DrawBuffer->_Xmin;
-      cy = ctx->DrawBuffer->_Ymin;
-      cw = ctx->DrawBuffer->_Xmax - ctx->DrawBuffer->_Xmin;
-      ch = ctx->DrawBuffer->_Ymax - ctx->DrawBuffer->_Ymin;
-
-      clear.x1 = cx;
-      clear.y1 = cy;
-      clear.x2 = cx + cw;
-      clear.y2 = cy + ch;
-
-      /* Back and stencil cliprects are the same.  Try and do both
-       * buffers at once:
-       */
-      if (mask & (BUFFER_BIT_BACK_LEFT|BUFFER_BIT_STENCIL|BUFFER_BIT_DEPTH)) { 
-	 intel->vtbl.meta_draw_region(intel, 
-				      intel->back_region,
-				      intel->depth_region );
-
-	 if (mask & BUFFER_BIT_BACK_LEFT)
-	    intel->vtbl.meta_color_mask(intel, GL_TRUE );
-	 else
-	    intel->vtbl.meta_color_mask(intel, GL_FALSE );
-
-	 if (mask & BUFFER_BIT_STENCIL) 
-	    intel->vtbl.meta_stencil_replace( intel, 
-					      intel->ctx.Stencil.WriteMask[0], 
-					      intel->ctx.Stencil.Clear);
-	 else
-	    intel->vtbl.meta_no_stencil_write(intel);
-
-	 if (mask & BUFFER_BIT_DEPTH) 
-	    intel->vtbl.meta_depth_replace( intel );
-	 else
-	    intel->vtbl.meta_no_depth_write(intel);
-      
-	 /* XXX: Using INTEL_BATCH_NO_CLIPRECTS here is dangerous as the
-	  * drawing origin may not be correctly emitted.
-	  */
-	 intel->vtbl.meta_draw_quad(intel, 
-				    clear.x1, clear.x2, 
-				    clear.y1, clear.y2, 
-				    intel->ctx.Depth.Clear,
-				    intel->clear_chan[0], 
-				    intel->clear_chan[1], 
-				    intel->clear_chan[2], 
-				    intel->clear_chan[3], 
-				    0, 0, 0, 0);
-      }
-
-      /* Front may have different cliprects: 
-       */
-      if (mask & BUFFER_BIT_FRONT_LEFT) {
-	 intel->vtbl.meta_no_depth_write(intel);
-	 intel->vtbl.meta_no_stencil_write(intel);
-	 intel->vtbl.meta_color_mask(intel, GL_TRUE );
-	 intel->vtbl.meta_draw_region(intel, 
-				      intel->front_region,
-				      intel->depth_region);
-
-	 /* XXX: Using INTEL_BATCH_NO_CLIPRECTS here is dangerous as the
-	  * drawing origin may not be correctly emitted.
-	  */
-	 intel->vtbl.meta_draw_quad(intel, 
-				    clear.x1, clear.x2, 
-				    clear.y1, clear.y2, 
-				    0,
-				    intel->clear_chan[0], 
-				    intel->clear_chan[1], 
-				    intel->clear_chan[2], 
-				    intel->clear_chan[3], 
-				    0, 0, 0, 0);
-      }
-
-      intel->vtbl.leave_meta_state( intel );
-   }
-}
-
-
-
-
-
-static void intelClear(GLcontext *ctx, GLbitfield mask)
-{
-   struct intel_context *intel = intel_context( ctx );
-   const GLuint colorMask = *((GLuint *) &ctx->Color.ColorMask);
-   GLbitfield tri_mask = 0;
-   GLbitfield blit_mask = 0;
-   GLbitfield swrast_mask = 0;
-
-   if (INTEL_DEBUG & DEBUG_DRI)
-      fprintf(stderr, "%s %x\n", __FUNCTION__, mask);
-
-
-   if (mask & BUFFER_BIT_FRONT_LEFT) {
-      if (colorMask == ~0) {
-	 blit_mask |= BUFFER_BIT_FRONT_LEFT;
-      } 
-      else {
-	 tri_mask |= BUFFER_BIT_FRONT_LEFT;
-      }
-   }
-
-   if (mask & BUFFER_BIT_BACK_LEFT) {
-      if (colorMask == ~0) {
-	 blit_mask |= BUFFER_BIT_BACK_LEFT;
-      } 
-      else {
-	 tri_mask |= BUFFER_BIT_BACK_LEFT;
-      }
-   }
-
-
-   if (mask & BUFFER_BIT_STENCIL) {
-      if (!intel->hw_stencil) {
-	 swrast_mask |= BUFFER_BIT_STENCIL;
-      }
-      else if ((ctx->Stencil.WriteMask[0] & 0xff) != 0xff ||
-	       intel->depth_region->tiled) {
-	 tri_mask |= BUFFER_BIT_STENCIL;
-      } 
-      else {
-	 blit_mask |= BUFFER_BIT_STENCIL;
-      }
-   }
-
-   /* Do depth with stencil if possible to avoid 2nd pass over the
-    * same buffer.
-    */
-   if (mask & BUFFER_BIT_DEPTH) {
-      if ((tri_mask & BUFFER_BIT_STENCIL) ||
-	  intel->depth_region->tiled)
-	 tri_mask |= BUFFER_BIT_DEPTH;
-      else 
-	 blit_mask |= BUFFER_BIT_DEPTH;
-   }
-
-   swrast_mask |= (mask & BUFFER_BIT_ACCUM);
-
-   intelFlush( ctx );
-
-   if (blit_mask)
-      intelClearWithBlit( ctx, blit_mask );
-
-   if (tri_mask) 
-      intelClearWithTris( intel, tri_mask );
-
-   if (swrast_mask)
-      _swrast_Clear( ctx, swrast_mask );
-}
-
-
-
-
-
-
-
-/* Flip the front & back buffers
- */
-static void intelPageFlip( const __DRIdrawablePrivate *dPriv )
-{
-#if 0
-   struct intel_context *intel;
-   int tmp, ret;
-
-   if (INTEL_DEBUG & DEBUG_IOCTL)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
-   assert(dPriv);
-   assert(dPriv->driContextPriv);
-   assert(dPriv->driContextPriv->driverPrivate);
-
-   intel = (struct intel_context *) dPriv->driContextPriv->driverPrivate;
-
-   intelFlush( &intel->ctx );
-   LOCK_HARDWARE( intel );
-
-   if (dPriv->pClipRects) {
-      *(drm_clip_rect_t *)intel->sarea->boxes = dPriv->pClipRects[0];
-      intel->sarea->nbox = 1;
-   }
-
-   ret = drmCommandNone(intel->driFd, DRM_I830_FLIP); 
-   if (ret) {
-      fprintf(stderr, "%s: %d\n", __FUNCTION__, ret);
-      UNLOCK_HARDWARE( intel );
-      exit(1);
-   }
-
-   tmp = intel->sarea->last_enqueue;
-   intelRefillBatchLocked( intel );
-   UNLOCK_HARDWARE( intel );
-
-
-   intelSetDrawBuffer( &intel->ctx, intel->ctx.Color.DriverDrawBuffer );
-#endif
-}
-
-
-void intelSwapBuffers( __DRIdrawablePrivate *dPriv )
-{
-   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
-      struct intel_context *intel;
-      GLcontext *ctx;
-      intel = (struct intel_context *) dPriv->driContextPriv->driverPrivate;
-      ctx = &intel->ctx;
-      if (ctx->Visual.doubleBufferMode) {
-	 _mesa_notifySwapBuffers( ctx );  /* flush pending rendering comands */
-	 if ( 0 /*intel->doPageFlip*/ ) { /* doPageFlip is never set !!! */
-	    intelPageFlip( dPriv );
-	 } else {
-	    intelCopyBuffer( dPriv, NULL );
-	 }
-      }
-   } else {
-      /* XXX this shouldn't be an error but we can't handle it for now */
-      fprintf(stderr, "%s: drawable has no context!\n", __FUNCTION__);
-   }
-}
-
-void intelCopySubBuffer( __DRIdrawablePrivate *dPriv,
-			 int x, int y, int w, int h )
-{
-   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
-      struct intel_context *intel = dPriv->driContextPriv->driverPrivate;
-      GLcontext *ctx = &intel->ctx;
-
-      if (ctx->Visual.doubleBufferMode) {
-	 drm_clip_rect_t rect;
-	 rect.x1 = x + dPriv->x;
-	 rect.y1 = (dPriv->h - y - h) + dPriv->y;
-	 rect.x2 = rect.x1 + w;
-	 rect.y2 = rect.y1 + h;
-	 _mesa_notifySwapBuffers( ctx );  /* flush pending rendering comands */
-	 intelCopyBuffer( dPriv, &rect );
-      }
-   } else {
-      /* XXX this shouldn't be an error but we can't handle it for now */
-      fprintf(stderr, "%s: drawable has no context!\n", __FUNCTION__);
-   }
-}
-
-
-static void intelDrawBuffer(GLcontext *ctx, GLenum mode )
-{
-   struct intel_context *intel = intel_context(ctx);
-   int front = 0;
- 
-   if (!ctx->DrawBuffer)
-      return;
-
-   switch ( ctx->DrawBuffer->_ColorDrawBufferMask[0] ) {
-   case BUFFER_BIT_FRONT_LEFT:
-      front = 1;
-      FALLBACK( intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE );
-      break;
-   case BUFFER_BIT_BACK_LEFT:
-      front = 0;
-      FALLBACK( intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE );
-      break;
-   default:
-      FALLBACK( intel, INTEL_FALLBACK_DRAW_BUFFER, GL_TRUE );
-      return;
-   }
-
-   if ( intel->sarea->pf_current_page == 1 ) 
-      front ^= 1;
-   
-   intelSetFrontClipRects( intel );
-
-
-   if (front) {
-      if (intel->draw_region != intel->front_region) {
-	 intel_region_release(intel, &intel->draw_region);
-	 intel_region_reference(&intel->draw_region, intel->front_region);
-      }
-   } else {
-      if (intel->draw_region != intel->back_region) {
-	 intel_region_release(intel, &intel->draw_region);
-	 intel_region_reference(&intel->draw_region, intel->back_region);
-      }
-   }
-
-   intel->vtbl.set_draw_region( intel, 
-				intel->draw_region,
-				intel->depth_region);
-}
-
-static void intelReadBuffer( GLcontext *ctx, GLenum mode )
-{
-   /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */
-}
-
-
-
-void intelInitBufferFuncs( struct dd_function_table *functions )
-{
-   functions->Clear = intelClear;
-   functions->GetBufferSize = intelBufferSize;
-   functions->DrawBuffer = intelDrawBuffer;
-   functions->ReadBuffer = intelReadBuffer;
-}
+../intel/intel_buffers.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_context.c b/src/mesa/drivers/dri/i965/intel_context.c
index fb95c492eb..0866c5ccc6 100644
--- a/src/mesa/drivers/dri/i965/intel_context.c
+++ b/src/mesa/drivers/dri/i965/intel_context.c
@@ -57,10 +57,13 @@
 #include "intel_batchbuffer.h"
 #include "intel_blit.h"
 #include "intel_regions.h"
+#include "intel_buffers.h"
 #include "intel_buffer_objects.h"
 #include "intel_decode.h"
+#include "intel_fbo.h"
 #include "intel_bufmgr_ttm.h"
 
+#include "drirenderbuffer.h"
 #include "i915_drm.h"
 
 #include "utils.h"
@@ -83,8 +86,10 @@ int INTEL_DEBUG = (0);
 #define need_GL_EXT_blend_minmax
 #define need_GL_EXT_cull_vertex
 #define need_GL_EXT_fog_coord
+#define need_GL_EXT_framebuffer_object
 #define need_GL_EXT_multi_draw_arrays
 #define need_GL_EXT_secondary_color
+#define need_GL_ATI_separate_stencil
 #define need_GL_EXT_point_parameters
 #define need_GL_VERSION_2_0
 #define need_GL_VERSION_2_1
@@ -184,7 +189,14 @@ const struct dri_extension card_extensions[] =
     { "GL_EXT_fog_coord",                  GL_EXT_fog_coord_functions },
     { "GL_EXT_multi_draw_arrays",          GL_EXT_multi_draw_arrays_functions },
     { "GL_EXT_secondary_color",            GL_EXT_secondary_color_functions },
+    { "GL_ATI_separate_stencil",           GL_ATI_separate_stencil_functions },
     { "GL_EXT_stencil_wrap",               NULL },
+    /* Do not enable this extension.  It conflicts with GL_ATI_separate_stencil
+     * and 2.0's separate stencil, because mesa's computed _TestTwoSide will
+     * only reflect whether it's enabled through this extension, even if the
+     * application is using the other interfaces.
+     */
+/*{ "GL_EXT_stencil_two_side",           GL_EXT_stencil_two_side_functions },*/
     { "GL_EXT_texture_edge_clamp",         NULL },
     { "GL_EXT_texture_env_combine",        NULL },
     { "GL_EXT_texture_env_dot3",           NULL },
@@ -202,20 +214,38 @@ const struct dri_extension card_extensions[] =
     { "GL_ARB_shader_objects",             GL_ARB_shader_objects_functions},
     { "GL_ARB_vertex_shader",              GL_ARB_vertex_shader_functions},
     { "GL_ARB_fragment_shader",            NULL },
-    /* XXX not implement yet, to compile builtin glsl lib */
     { "GL_ARB_draw_buffers",               NULL },
     { NULL,                                NULL }
 };
 
+const struct dri_extension ttm_extensions[] = {
+   {"GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions},
+   {"GL_ARB_pixel_buffer_object", NULL},
+   {NULL, NULL}
+};
+
 const struct dri_extension arb_oc_extension = 
     { "GL_ARB_occlusion_query",            GL_ARB_occlusion_query_functions};
 
+/**
+ * Initializes potential list of extensions if ctx == NULL, or actually enables
+ * extensions for a context.
+ */
 void intelInitExtensions(GLcontext *ctx, GLboolean enable_imaging)
-{	     
-	struct intel_context *intel = ctx?intel_context(ctx):NULL;
-	driInitExtensions(ctx, card_extensions, enable_imaging);
-	if (!ctx || intel->intelScreen->drmMinor >= 8)
-		driInitSingleExtension (ctx, &arb_oc_extension);
+{
+   struct intel_context *intel = ctx?intel_context(ctx):NULL;
+
+   /* Disable imaging extension until convolution is working in teximage paths.
+    */
+   enable_imaging = GL_FALSE;
+
+   driInitExtensions(ctx, card_extensions, enable_imaging);
+
+   if (intel == NULL || intel->ttm)
+      driInitExtensions(ctx, ttm_extensions, GL_FALSE);
+
+   if (intel == NULL || intel->intelScreen->drmMinor >= 8)
+      driInitSingleExtension(ctx, &arb_oc_extension);
 }
 
 static const struct dri_debug_control debug_control[] =
@@ -244,6 +274,7 @@ static const struct dri_debug_control debug_control[] =
     { "blit",  DEBUG_BLIT},
     { "mip",   DEBUG_MIPTREE},
     { "reg",   DEBUG_REGION},
+    { "fbo",   DEBUG_FBO },
     { NULL,    0 }
 };
 
@@ -506,17 +537,10 @@ GLboolean intelInitContext( struct intel_context *intel,
    switch(mesaVis->depthBits) {
    case 0:			/* what to do in this case? */
    case 16:
-      intel->depth_scale = 1.0/0xffff;
       intel->polygon_offset_scale = 1.0/0xffff;
-      intel->depth_clear_mask = ~0;
-      intel->ClearDepth = 0xffff;
       break;
    case 24:
-      intel->depth_scale = 1.0/0xffffff;
       intel->polygon_offset_scale = 2.0/0xffffff; /* req'd to pass glean */
-      intel->depth_clear_mask = 0x00ffffff;
-      intel->stencil_clear_mask = 0xff000000;
-      intel->ClearDepth = 0x00ffffff;
       break;
    default:
       assert(0); 
@@ -534,12 +558,14 @@ GLboolean intelInitContext( struct intel_context *intel,
 
    INTEL_DEBUG  = driParseDebugString( getenv( "INTEL_DEBUG" ),
 				       debug_control );
-   if (!intel->ttm && (INTEL_DEBUG & DEBUG_BUFMGR))
-      dri_bufmgr_fake_set_debug(intel->bufmgr, GL_TRUE);
+   if (INTEL_DEBUG & DEBUG_BUFMGR)
+      dri_bufmgr_set_debug(intel->bufmgr, GL_TRUE);
 
    intel_recreate_static_regions(intel);
 
    intel_bufferobj_init( intel );
+   intel_fbo_init( intel );
+
    intel->batch = intel_batchbuffer_alloc( intel );
    intel->last_swap_fence = NULL;
    intel->first_swap_fence = NULL;
@@ -612,16 +638,6 @@ void intelDestroyContext(__DRIcontextPrivate *driContextPriv)
 	  */
       }
 
-      /* Free the regions created to describe front/back/depth
-       * buffers:
-       */
-#if 0
-      intel_region_release(intel, &intel->front_region);
-      intel_region_release(intel, &intel->back_region);
-      intel_region_release(intel, &intel->depth_region);
-      intel_region_release(intel, &intel->draw_region);
-#endif
-
       /* free the Mesa context */
       intel->ctx.VertexProgram.Current = NULL;
       intel->ctx.FragmentProgram.Current = NULL;
@@ -642,7 +658,44 @@ GLboolean intelMakeCurrent(__DRIcontextPrivate *driContextPriv,
 {
 
    if (driContextPriv) {
-      struct intel_context *intel = (struct intel_context *) driContextPriv->driverPrivate;
+      struct intel_context *intel =
+	 (struct intel_context *) driContextPriv->driverPrivate;
+      struct intel_framebuffer *intel_fb =
+	 (struct intel_framebuffer *) driDrawPriv->driverPrivate;
+      GLframebuffer *readFb = (GLframebuffer *) driReadPriv->driverPrivate;
+
+      /* XXX FBO temporary fix-ups! */
+      /* if the renderbuffers don't have regions, init them from the context.
+       * They will be unreferenced when the renderbuffer is destroyed.
+       */
+      {
+         struct intel_renderbuffer *irbDepth
+            = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH);
+         struct intel_renderbuffer *irbStencil
+            = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL);
+
+         if (intel_fb->color_rb[0] && !intel_fb->color_rb[0]->region) {
+            intel_region_reference(&intel_fb->color_rb[0]->region,
+				   intel->front_region);
+         }
+         if (intel_fb->color_rb[1] && !intel_fb->color_rb[1]->region) {
+            intel_region_reference(&intel_fb->color_rb[1]->region,
+				   intel->back_region);
+         }
+         if (intel_fb->color_rb[2] && !intel_fb->color_rb[2]->region) {
+            intel_region_reference(&intel_fb->color_rb[2]->region,
+				   intel->third_region);
+         }
+         if (irbDepth && !irbDepth->region) {
+            intel_region_reference(&irbDepth->region, intel->depth_region);
+         }
+         if (irbStencil && !irbStencil->region) {
+            intel_region_reference(&irbStencil->region, intel->depth_region);
+         }
+      }
+
+      /* set GLframebuffer size to match window, if needed */
+      driUpdateFramebufferSize(&intel->ctx, driDrawPriv);
 
       if (intel->driReadDrawable != driReadPriv) {
           intel->driReadDrawable = driReadPriv;
@@ -662,10 +715,10 @@ GLboolean intelMakeCurrent(__DRIcontextPrivate *driContextPriv,
       }
 
       _mesa_make_current(&intel->ctx,
-			 (GLframebuffer *) driDrawPriv->driverPrivate,
-			 (GLframebuffer *) driReadPriv->driverPrivate);
+			 &intel_fb->Base,
+			 readFb);
 
-      intel->ctx.Driver.DrawBuffer( &intel->ctx, intel->ctx.Color.DrawBuffer[0] );
+      intel_draw_buffer(&intel->ctx, &intel_fb->Base);
    } else {
       _mesa_make_current(NULL, NULL, NULL);
    }
diff --git a/src/mesa/drivers/dri/i965/intel_context.h b/src/mesa/drivers/dri/i965/intel_context.h
index 17612acdaf..52fee68d1a 100644
--- a/src/mesa/drivers/dri/i965/intel_context.h
+++ b/src/mesa/drivers/dri/i965/intel_context.h
@@ -33,8 +33,10 @@
 #include "mtypes.h"
 #include "drm.h"
 #include "texmem.h"
+#include "dri_bufmgr.h"
 
 #include "intel_screen.h"
+#include "intel_tex_obj.h"
 #include "i830_common.h"
 #include "tnl/t_vertex.h"
 
@@ -59,32 +61,15 @@ typedef void (*intel_point_func)(struct intel_context *, intelVertex *);
 #define INTEL_FALLBACK_USER		 0x4
 #define INTEL_FALLBACK_RENDERMODE	 0x8
 #define INTEL_FALLBACK_TEXTURE   	 0x10
+#define INTEL_FALLBACK_DEPTH_BUFFER	 0x20
+#define INTEL_FALLBACK_STENCIL_BUFFER	 0x40
 
 extern void intelFallback( struct intel_context *intel, GLuint bit, GLboolean mode );
 #define FALLBACK( intel, bit, mode ) intelFallback( intel, bit, mode )
 
-
-
-struct intel_texture_object
-{
-   struct gl_texture_object base; /* The "parent" object */
-
-   /* The mipmap tree must include at least these levels once
-    * validated:
-    */
-   GLuint firstLevel;
-   GLuint lastLevel;
-
-   GLuint dirty_images[6];
-   GLuint dirty;
-
-   /* On validation any active images held in main memory or in other
-    * regions will be copied to this region and the old storage freed.
-    */
-   struct intel_mipmap_tree *mt;
-};
-
-
+#define INTEL_WRITE_PART  0x1
+#define INTEL_WRITE_FULL  0x2
+#define INTEL_READ        0x4
 
 struct intel_context
 {
@@ -142,12 +127,11 @@ struct intel_context
       void (*meta_frame_buffer_texture)( struct intel_context *intel,
 					 GLint xoff, GLint yoff );
 
-      void (*meta_draw_quad)(struct intel_context *intel, 
+      void (*meta_draw_quad)(struct intel_context *intel,
 			     GLfloat x0, GLfloat x1,
-			     GLfloat y0, GLfloat y1, 
+			     GLfloat y0, GLfloat y1,
 			     GLfloat z,
-			     GLubyte red, GLubyte green,
-			     GLubyte blue, GLubyte alpha,
+			     GLuint color, /* ARGB32 */
 			     GLfloat s0, GLfloat s1,
 			     GLfloat t0, GLfloat t1);
 
@@ -181,13 +165,10 @@ struct intel_context
    unsigned batch_id;
 
    GLubyte clear_chan[4];
-   GLuint ClearColor;
-   GLuint ClearDepth;
+   GLuint ClearColor565;
+   GLuint ClearColor8888;
 
-   GLfloat depth_scale;
    GLfloat polygon_offset_scale; /* dependent on depth_scale, bpp */
-   GLuint depth_clear_mask;
-   GLuint stencil_clear_mask;
 
    GLboolean hw_stencil;
    GLboolean hw_stipple;
@@ -211,6 +192,7 @@ struct intel_context
    GLuint numClipRects;		/* cliprects for that buffer */
    drm_clip_rect_t *pClipRects;
    struct gl_texture_object *frame_buffer_texobj;
+   drm_clip_rect_t fboRect;     /**< cliprect for FBO rendering */
 
    GLboolean scissor;
    drm_clip_rect_t draw_rect;
@@ -306,32 +288,6 @@ static inline void * __memcpy(void * to, const void * from, size_t n)
 #endif
 
 
-/* The system memcpy (at least on ubuntu 5.10) has problems copying
- * to agp (writecombined) memory from a source which isn't 64-byte
- * aligned - there is a 4x performance falloff.
- *
- * The x86 __memcpy is immune to this but is slightly slower
- * (10%-ish) than the system memcpy.
- *
- * The sse_memcpy seems to have a slight cliff at 64/32 bytes, but
- * isn't much faster than x86_memcpy for agp copies.
- * 
- * TODO: switch dynamically.
- */
-static inline void *do_memcpy( void *dest, const void *src, size_t n )
-{
-   if ( (((unsigned long)src) & 63) ||
-	(((unsigned long)dest) & 63)) {
-      return  __memcpy(dest, src, n);	
-   }
-   else
-      return memcpy(dest, src, n);
-}
-
-
-
-
-
 /* ================================================================
  * Debugging:
  */
@@ -361,6 +317,7 @@ extern int INTEL_DEBUG;
 #define DEBUG_BLIT	0x200000
 #define DEBUG_REGION	0x400000
 #define DEBUG_MIPTREE	0x800000
+#define DEBUG_FBO	0x1000000
 
 #define DBG(...) do {						\
 	if (INTEL_DEBUG & FILE_DEBUG_FLAG)			\
@@ -493,15 +450,5 @@ static inline struct intel_context *intel_context( GLcontext *ctx )
    return (struct intel_context *)ctx;
 }
 
-static inline struct intel_texture_object *intel_texture_object( struct gl_texture_object *obj )
-{
-   return (struct intel_texture_object *)obj;
-}
-
-static inline struct intel_texture_image *intel_texture_image( struct gl_texture_image *img )
-{
-   return (struct intel_texture_image *)img;
-}
-
 #endif
 
diff --git a/src/mesa/drivers/dri/i965/intel_depthstencil.c b/src/mesa/drivers/dri/i965/intel_depthstencil.c
new file mode 120000
index 0000000000..4ac4ae690a
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_depthstencil.c
@@ -0,0 +1 @@
+../intel/intel_depthstencil.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
new file mode 120000
index 0000000000..a19f86dcc5
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -0,0 +1 @@
+../intel/intel_fbo.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_ioctl.c b/src/mesa/drivers/dri/i965/intel_ioctl.c
index 91677c0c52..ebf5e3ae83 100644
--- a/src/mesa/drivers/dri/i965/intel_ioctl.c
+++ b/src/mesa/drivers/dri/i965/intel_ioctl.c
@@ -45,17 +45,6 @@
 #include "intel_bufmgr_ttm.h"
 #include "i915_drm.h"
 
-static void intelWaitIdleLocked( struct intel_context *intel )
-{
-   unsigned int fence;
-
-   if (INTEL_DEBUG & DEBUG_SYNC)
-      fprintf(stderr, "waiting for idle\n");
-
-   fence = intelEmitIrqLocked(intel);
-   intelWaitIrq(intel, fence);
-}
-
 int intelEmitIrqLocked( struct intel_context *intel )
 {
    int seq = 1;
@@ -185,7 +174,7 @@ intel_exec_ioctl(struct intel_context *intel,
    execbuf.batch.DR4 = ((((GLuint) intel->drawX) & 0xffff) |
 			(((GLuint) intel->drawY) << 16));
 
-   execbuf.ops_list = (unsigned)start; // TODO
+   execbuf.ops_list = (unsigned long)start; // TODO
    execbuf.fence_arg.flags = DRM_FENCE_FLAG_SHAREABLE | DRM_I915_FENCE_FLAG_FLUSHED;
 
    if (intel->no_hw)
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index edca84c64e..242fed0b6a 100644..120000
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1,260 +1 @@
-/**************************************************************************
- * 
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "intel_context.h"
-#include "intel_mipmap_tree.h"
-#include "intel_regions.h"
-#include "dri_bufmgr.h"
-#include "enums.h"
-#include "imports.h"
-
-#define FILE_DEBUG_FLAG DEBUG_MIPTREE
-
-static GLenum target_to_target( GLenum target )
-{
-   switch (target) {
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
-      return GL_TEXTURE_CUBE_MAP_ARB;
-   default:
-      return target;
-   }
-}
-
-struct intel_mipmap_tree *intel_miptree_create( struct intel_context *intel,
-						GLenum target,
-						GLenum internal_format,
-						GLuint first_level,
-						GLuint last_level,
-						GLuint width0,
-						GLuint height0,
-						GLuint depth0,
-						GLuint cpp,
-						GLboolean compressed)
-{
-   GLboolean ok;
-   struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
-
-   if (INTEL_DEBUG & DEBUG_TEXTURE)
-      _mesa_printf("%s target %s format %s level %d..%d\n", __FUNCTION__,
-		   _mesa_lookup_enum_by_nr(target),
-		   _mesa_lookup_enum_by_nr(internal_format),
-		   first_level,
-		   last_level);
-
-   mt->target = target_to_target(target);
-   mt->internal_format = internal_format;
-   mt->first_level = first_level;
-   mt->last_level = last_level;
-   mt->width0 = width0;
-   mt->height0 = height0;
-   mt->depth0 = depth0;
-   mt->cpp = cpp;
-   mt->compressed = compressed;
-
-   switch (intel->intelScreen->deviceID) {
-#if 0
-   case PCI_CHIP_I945_G:
-      ok = i945_miptree_layout( mt );
-      break;
-   case PCI_CHIP_I915_G:
-   case PCI_CHIP_I915_GM:
-      ok = i915_miptree_layout( mt );
-      break;
-#endif
-   default:
-      if (INTEL_DEBUG & DEBUG_TEXTURE)
-	 _mesa_printf("assuming BRW texture layouts\n");
-      ok = brw_miptree_layout( mt );
-      break;
-   }
-
-   if (ok)
-      mt->region = intel_region_alloc( intel, 
-				       mt->cpp,
-				       mt->pitch, 
-				       mt->total_height );
-
-   if (!mt->region) {
-      free(mt);
-      return NULL;
-   }
-
-   return mt;
-}
-
-
-
-void intel_miptree_destroy( struct intel_context *intel,
-			    struct intel_mipmap_tree *mt )
-{
-   if (mt) {
-      GLuint i;
-
-      intel_region_release(intel, &(mt->region));
-
-      for (i = 0; i < MAX_TEXTURE_LEVELS; i++)
-	 if (mt->level[i].image_offset)
-	    free(mt->level[i].image_offset);
-
-      free(mt);
-   }
-}
-
-
-
-
-void intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
-				  GLuint level,
-				  GLuint nr_images,
-				  GLuint x, GLuint y,
-				  GLuint w, GLuint h, GLuint d)
-{
-   mt->level[level].width = w;
-   mt->level[level].height = h;
-   mt->level[level].depth = d;
-   mt->level[level].level_offset = (x + y * mt->pitch) * mt->cpp;
-   mt->level[level].nr_images = nr_images;
-
-   if (INTEL_DEBUG & DEBUG_TEXTURE)
-      _mesa_printf("%s level %d img size: %d,%d level_offset 0x%x\n", __FUNCTION__, level, w, h, 
-		   mt->level[level].level_offset);
-
-   /* Not sure when this would happen, but anyway: 
-    */
-   if (mt->level[level].image_offset) {
-      free(mt->level[level].image_offset);
-      mt->level[level].image_offset = NULL;
-   }
-
-   if (nr_images > 1) {
-      mt->level[level].image_offset = malloc(nr_images * sizeof(GLuint));
-      mt->level[level].image_offset[0] = 0;
-   }
-}
-
-
-
-void intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
-				    GLuint level,
-				    GLuint img,
-				    GLuint x, GLuint y)
-{
-   if (INTEL_DEBUG & DEBUG_TEXTURE)
-      _mesa_printf("%s level %d img %d pos %d,%d\n", __FUNCTION__, level, img, x, y);
-
-   if (img == 0)
-      assert(x == 0 && y == 0);
-
-   if (img > 0)
-      mt->level[level].image_offset[img] = (x + y * mt->pitch) * mt->cpp;
-}
-
-
-/* Although we use the image_offset[] array to store relative offsets
- * to cube faces, Mesa doesn't know anything about this and expects
- * each cube face to be treated as a separate image.
- *
- * These functions present that view to mesa:
- */
-const GLuint *intel_miptree_depth_offsets(struct intel_mipmap_tree *mt,
-					  GLuint level)
-{
-   static const GLuint zero = 0;
-
-   if (mt->target != GL_TEXTURE_3D ||
-       mt->level[level].nr_images == 1)
-      return &zero;
-   else
-      return mt->level[level].image_offset;
-}
-
-
-GLuint intel_miptree_image_offset(struct intel_mipmap_tree *mt,
-				  GLuint face,
-				  GLuint level)
-{
-   if (mt->target == GL_TEXTURE_CUBE_MAP_ARB)
-      return (mt->level[level].level_offset +
-	      mt->level[level].image_offset[face]);
-   else
-      return mt->level[level].level_offset;
-}
-
-
-
-
-
-extern GLuint intel_compressed_alignment(GLenum);
-/* Upload data for a particular image.
- */
-GLboolean intel_miptree_image_data(struct intel_context *intel, 
-				   struct intel_mipmap_tree *dst,
-				   GLuint face,
-				   GLuint level,
-				   const void *src, 
-				   GLuint src_row_pitch,
-				   GLuint src_image_pitch)
-{
-   GLuint depth = dst->level[level].depth;
-   GLuint dst_offset = intel_miptree_image_offset(dst, face, level);
-   const GLuint *dst_depth_offset = intel_miptree_depth_offsets(dst, level);
-   GLuint i;
-   GLuint width, height, alignment;
-
-   width = dst->level[level].width;
-   height = dst->level[level].height;
-
-   if (dst->compressed) {
-       alignment = intel_compressed_alignment(dst->internal_format);
-       src_row_pitch = ALIGN(src_row_pitch, alignment);
-       width = ALIGN(width, alignment);
-       height = (height + 3) / 4;
-   }
-
-   DBG("%s\n", __FUNCTION__);
-   for (i = 0; i < depth; i++) {
-      if (!intel_region_data(intel,
-			     dst->region, 
-			     dst_offset + dst_depth_offset[i],
-			     0,
-			     0,
-			     src,
-			     src_row_pitch,
-			     0, 0,	/* source x,y */
-			     width,
-			     height))
-	 return GL_FALSE;
-      src += src_image_pitch;
-   }
-   return GL_TRUE;
-}
-
+../intel/intel_mipmap_tree.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
deleted file mode 100644
index dbd7167b77..0000000000
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ /dev/null
@@ -1,166 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef INTEL_MIPMAP_TREE_H
-#define INTEL_MIPMAP_TREE_H
-
-#include "intel_regions.h"
-
-/* A layer on top of the intel_regions code which adds:
- *
- * - Code to size and layout a region to hold a set of mipmaps.
- * - Query to determine if a new image fits in an existing tree.
- *
- * The fixed mipmap layout of intel hardware where one offset
- * specifies the position of all images in a mipmap hierachy
- * complicates the implementation of GL texture image commands,
- * compared to hardware where each image is specified with an
- * independent offset.
- *
- * In an ideal world, each texture object would be associated with a
- * single bufmgr buffer or 2d intel_region, and all the images within
- * the texture object would slot into the tree as they arrive.  The
- * reality can be a little messier, as images can arrive from the user
- * with sizes that don't fit in the existing tree, or in an order
- * where the tree layout cannot be guessed immediately.  
- * 
- * This structure encodes an idealized mipmap tree.  The GL image
- * commands build these where possible, otherwise store the images in
- * temporary system buffers.
- */
-
-
-struct intel_mipmap_level {
-   GLuint level_offset;
-   GLuint width;
-   GLuint height;
-   GLuint depth;
-   GLuint nr_images;
-
-   /* Explicitly store the offset of each image for each cube face or
-    * depth value.  Pretty much have to accept that hardware formats
-    * are going to be so diverse that there is no unified way to
-    * compute the offsets of depth/cube images within a mipmap level,
-    * so have to store them as a lookup table:
-    */
-   GLuint *image_offset;
-};
-
-struct intel_mipmap_tree {
-   /* Effectively the key:
-    */
-   GLenum target;
-   GLenum internal_format;
-
-   GLuint first_level;
-   GLuint last_level;
-
-   GLuint width0, height0, depth0;
-   GLuint cpp;
-   GLboolean compressed;
-
-   /* Derived from the above:
-    */   
-   GLuint pitch;
-   GLuint depth_pitch;		/* per-image on i945? */
-   GLuint total_height;
-   
-   /* Includes image offset tables:
-    */
-   struct intel_mipmap_level level[MAX_TEXTURE_LEVELS];
-
-   /* The data is held here:
-    */
-   struct intel_region *region;
-
-   /* These are also refcounted:
-    */
-   GLuint refcount;
-};
-
-
-
-struct intel_mipmap_tree *intel_miptree_create( struct intel_context *intel,
-						GLenum target,
-						GLenum internal_format,
-						GLuint first_level,
-						GLuint last_level,
-						GLuint width0,
-						GLuint height0,
-						GLuint depth0,
-						GLuint cpp,
-						GLboolean compressed);
-
-void intel_miptree_destroy( struct intel_context *intel,
-			    struct intel_mipmap_tree *mt );
-
-
-/* Return the linear offset of an image relative to the start of the
- * tree:
- */
-GLuint intel_miptree_image_offset( struct intel_mipmap_tree *mt,
-				   GLuint face,
-				   GLuint level );
-
-/* Return pointers to each 2d slice within an image.  Indexed by depth
- * value.
- */
-const GLuint *intel_miptree_depth_offsets(struct intel_mipmap_tree *mt,
-					  GLuint level);
-
-
-void intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
-				  GLuint level,
-				  GLuint nr_images,
-				  GLuint x, GLuint y,
-				  GLuint w, GLuint h, GLuint d);
-
-void intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
-				    GLuint level,
-				    GLuint img,
-				    GLuint x, GLuint y);
-
-
-/* Upload an image into a tree
- */
-GLboolean intel_miptree_image_data(struct intel_context *intel, 
-				   struct intel_mipmap_tree *dst,
-				   GLuint face,
-				   GLuint level,
-				   const void *src, 
-				   GLuint src_row_pitch,
-				   GLuint src_image_pitch);
-
-/* i915_mipmap_tree.c:
- */
-GLboolean i915_miptree_layout( struct intel_mipmap_tree *mt );
-GLboolean i945_miptree_layout( struct intel_mipmap_tree *mt );
-GLboolean brw_miptree_layout( struct intel_mipmap_tree *mt );
-
-
-
-#endif
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c
index 3777422619..9085c7b039 100644..120000
--- a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c
@@ -1,357 +1 @@
-/**************************************************************************
- * 
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portionsalloc
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "glheader.h"
-#include "enums.h"
-#include "image.h"
-#include "colormac.h"
-#include "mtypes.h"
-#include "macros.h"
-#include "bufferobj.h"
-#include "swrast/swrast.h"
-
-#include "intel_screen.h"
-#include "intel_context.h"
-#include "intel_ioctl.h"
-#include "intel_batchbuffer.h"
-#include "intel_blit.h"
-#include "intel_regions.h"
-#include "intel_buffer_objects.h"
-
-
-
-#define FILE_DEBUG_FLAG DEBUG_PIXEL
-
-
-/* Unlike the other intel_pixel_* functions, the expectation here is
- * that the incoming data is not in a PBO.  With the XY_TEXT blit
- * method, there's no benefit haveing it in a PBO, but we could
- * implement a path based on XY_MONO_SRC_COPY_BLIT which might benefit
- * PBO bitmaps.  I think they are probably pretty rare though - I
- * wonder if Xgl uses them?
- */
-static const GLubyte *map_pbo( GLcontext *ctx,
-			       GLsizei width, GLsizei height,
-			       const struct gl_pixelstore_attrib *unpack,
-			       const GLubyte *bitmap )
-{
-   GLubyte *buf;
-
-   if (!_mesa_validate_pbo_access(2, unpack, width, height, 1,
-				  GL_COLOR_INDEX, GL_BITMAP,
-				  (GLvoid *) bitmap)) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,"glBitmap(invalid PBO access)");
-      return NULL;
-   }
-
-   buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-					   GL_READ_ONLY_ARB,
-					   unpack->BufferObj);
-   if (!buf) {
-      _mesa_error(ctx, GL_INVALID_OPERATION, "glBitmap(PBO is mapped)");
-      return NULL;
-   }
-
-   return ADD_POINTERS(buf, bitmap);
-}
-
-static GLboolean test_bit( const GLubyte *src,
-			    GLuint bit )
-{
-   return (src[bit/8] & (1<<(bit % 8))) ? 1 : 0;
-}
-
-static void set_bit( GLubyte *dest,
-			  GLuint bit )
-{
-   dest[bit/8] |= 1 << (bit % 8);
-}
-
-/* Extract a rectangle's worth of data from the bitmap.  Called
- * per-cliprect.
- */
-static GLuint get_bitmap_rect(GLsizei width, GLsizei height,
-			      const struct gl_pixelstore_attrib *unpack,
-			      const GLubyte *bitmap,
-			      GLuint x, GLuint y, 
-			      GLuint w, GLuint h,
-			      GLubyte *dest,
-			      GLuint row_align,
-			      GLboolean invert)
-{
-   GLuint src_offset = (x + unpack->SkipPixels) & 0x7;
-   GLuint mask = unpack->LsbFirst ? 0 : 7;
-   GLuint bit = 0;
-   GLint row, col;
-   GLint first, last;
-   GLint incr;
-   GLuint count = 0;
-
-   if (INTEL_DEBUG & DEBUG_PIXEL)
-      _mesa_printf("%s %d,%d %dx%d bitmap %dx%d skip %d src_offset %d mask %d\n",
-		   __FUNCTION__, x,y,w,h,width,height,unpack->SkipPixels, src_offset, mask);
-
-   if (invert) {
-      first = h-1;
-      last = 0;
-      incr = -1;
-   }
-   else {
-      first = 0;
-      last = h-1;
-      incr = 1;
-   }
-
-   /* Require that dest be pre-zero'd.
-    */
-   for (row = first; row != (last+incr); row += incr) {
-      const GLubyte *rowsrc = _mesa_image_address2d(unpack, bitmap, 
-						    width, height, 
-						    GL_COLOR_INDEX, GL_BITMAP, 
-						    y + row, x);
-
-      for (col = 0; col < w; col++, bit++) {
-	 if (test_bit(rowsrc, (col + src_offset) ^ mask)) {
-	    set_bit(dest, bit ^ 7);
-	    count++;
-	 }
-      }
-
-      if (row_align)
-	 bit = ALIGN(bit, row_align);
-   }
-
-   return count;
-}
-
-
-
-
-/*
- * Render a bitmap.
- */
-static GLboolean
-do_blit_bitmap( GLcontext *ctx, 
-		GLint dstx, GLint dsty,
-		GLsizei width, GLsizei height,
-		const struct gl_pixelstore_attrib *unpack,
-		const GLubyte *bitmap )
-{
-   struct intel_context *intel = intel_context(ctx);
-   struct intel_region *dst = intel_drawbuf_region(intel);
-   GLfloat tmpColor[4];
-
-   union {
-      GLuint ui;
-      GLubyte ub[4];
-   } color;
-
-   if (!dst)
-       return GL_FALSE;
-
-   if (unpack->BufferObj->Name) {
-      bitmap = map_pbo(ctx, width, height, unpack, bitmap);
-      if (bitmap == NULL)
-	 return GL_TRUE;	/* even though this is an error, we're done */
-   }
-
-   COPY_4V(tmpColor, ctx->Current.RasterColor);
-
-   if (NEED_SECONDARY_COLOR(ctx)) {
-       ADD_3V(tmpColor, tmpColor, ctx->Current.RasterSecondaryColor);
-   }
-
-   UNCLAMPED_FLOAT_TO_CHAN(color.ub[0], tmpColor[2]);
-   UNCLAMPED_FLOAT_TO_CHAN(color.ub[1], tmpColor[1]);
-   UNCLAMPED_FLOAT_TO_CHAN(color.ub[2], tmpColor[0]);
-   UNCLAMPED_FLOAT_TO_CHAN(color.ub[3], tmpColor[3]);
-
-   /* Does zoom apply to bitmaps?
-    */
-   if (!intel_check_blit_fragment_ops(ctx) ||
-       ctx->Pixel.ZoomX != 1.0F || 
-       ctx->Pixel.ZoomY != 1.0F)
-      return GL_FALSE;
-
-   LOCK_HARDWARE(intel);
-
-   if (intel->driDrawable->numClipRects) {
-      __DRIdrawablePrivate *dPriv = intel->driDrawable;
-      drm_clip_rect_t *box = dPriv->pClipRects;
-      drm_clip_rect_t dest_rect;
-      GLint nbox = dPriv->numClipRects;
-      GLint srcx = 0, srcy = 0;
-      GLint orig_screen_x1, orig_screen_y2;
-      GLuint i;
-
-
-      orig_screen_x1 = dPriv->x + dstx;
-      orig_screen_y2 = dPriv->y + (dPriv->h - dsty);
-
-      /* Do scissoring in GL coordinates:
-       */
-      if (ctx->Scissor.Enabled)
-      {
-	 GLint x = ctx->Scissor.X;
-	 GLint y = ctx->Scissor.Y;
-	 GLuint w = ctx->Scissor.Width;
-	 GLuint h = ctx->Scissor.Height;
-
-         if (!_mesa_clip_to_region(x, y, x+w-1, y+h-1, &dstx, &dsty, &width, &height))
-            goto out;
-      }
-
-      /* Convert from GL to hardware coordinates:
-       */
-      dsty = dPriv->y + (dPriv->h - dsty - height);  
-      dstx = dPriv->x + dstx;
-
-      dest_rect.x1 = dstx < 0 ? 0 : dstx;
-      dest_rect.y1 = dsty < 0 ? 0 : dsty;
-      dest_rect.x2 = dstx + width < 0 ? 0 : dstx + width;
-      dest_rect.y2 = dsty + height < 0 ? 0 : dsty + height;
-
-      for (i = 0; i < nbox; i++) {
-         drm_clip_rect_t rect;
-	 int box_w, box_h;
-	 GLint px, py;
-	 GLuint stipple[32];  
-
-         if (!intel_intersect_cliprects(&rect, &dest_rect, &box[i]))
-            continue;
-
-	 /* Now go back to GL coordinates to figure out what subset of
-	  * the bitmap we are uploading for this cliprect:
-	  */
-	 box_w = rect.x2 - rect.x1;
-	 box_h = rect.y2 - rect.y1;
-	 srcx = rect.x1 - orig_screen_x1;
-	 srcy = orig_screen_y2 - rect.y2;
-
-
-#define DY 32
-#define DX 32
-
-	 /* Then, finally, chop it all into chunks that can be
-	  * digested by hardware:
-	  */
-	 for (py = 0; py < box_h; py += DY) { 
-	    for (px = 0; px < box_w; px += DX) { 
-	       int h = MIN2(DY, box_h - py);
-	       int w = MIN2(DX, box_w - px); 
-	       GLuint sz = ALIGN(ALIGN(w,8) * h, 64)/8;
-	       GLenum logic_op = ctx->Color.ColorLogicOpEnabled ?
-		  ctx->Color.LogicOp : GL_COPY;
-
-	       assert(sz <= sizeof(stipple));
-	       memset(stipple, 0, sz);
-
-	       /* May need to adjust this when padding has been introduced in
-		* sz above:
-		*/
-	       if (get_bitmap_rect(width, height, unpack, 
-				   bitmap,
-				   srcx + px, srcy + py, w, h,
-				   (GLubyte *)stipple,
-				   8,
-				   GL_TRUE) == 0)
-		  continue;
-
-	       /* 
-		*/
-	       intelEmitImmediateColorExpandBlit( intel,
-						  dst->cpp,
-						  (GLubyte *)stipple, 
-						  sz,
-						  color.ui,
-						  dst->pitch,
-						  dst->buffer,
-						  0,
-						  dst->tiled,
-						  rect.x1 + px,
-						  rect.y2 - (py + h),
-						  w, h,
-						  logic_op);
-	    } 
-	 } 
-      }
-      intel->need_flush = GL_TRUE;
-   out:
-      intel_batchbuffer_flush(intel->batch);
-   }
-   UNLOCK_HARDWARE(intel);
-
-
-   if (unpack->BufferObj->Name) {
-      /* done with PBO so unmap it now */
-      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-                              unpack->BufferObj);
-   }
-
-   return GL_TRUE;
-}
-
-
-
-
-
-/* There are a large number of possible ways to implement bitmap on
- * this hardware, most of them have some sort of drawback.  Here are a
- * few that spring to mind:
- * 
- * Blit:
- *    - XY_MONO_SRC_BLT_CMD
- *         - use XY_SETUP_CLIP_BLT for cliprect clipping.
- *    - XY_TEXT_BLT
- *    - XY_TEXT_IMMEDIATE_BLT
- *         - blit per cliprect, subject to maximum immediate data size.
- *    - XY_COLOR_BLT 
- *         - per pixel or run of pixels
- *    - XY_PIXEL_BLT
- *         - good for sparse bitmaps
- *
- * 3D engine:
- *    - Point per pixel
- *    - Translate bitmap to an alpha texture and render as a quad
- *    - Chop bitmap up into 32x32 squares and render w/polygon stipple.
- */
-void
-intelBitmap(GLcontext * ctx,
-	    GLint x, GLint y,
-	    GLsizei width, GLsizei height,
-	    const struct gl_pixelstore_attrib *unpack,
-	    const GLubyte * pixels)
-{
-   if (do_blit_bitmap(ctx, x, y, width, height,
-                          unpack, pixels))
-      return;
-
-   if (INTEL_DEBUG & DEBUG_PIXEL)
-      _mesa_printf("%s: fallback to swrast\n", __FUNCTION__);
-
-   _swrast_Bitmap(ctx, x, y, width, height, unpack, pixels);
-}
+../intel/intel_pixel_bitmap.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_copy.c b/src/mesa/drivers/dri/i965/intel_pixel_copy.c
index dc2e266a3d..7583906dd0 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_copy.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_copy.c
@@ -190,7 +190,7 @@ do_texture_copypixels(GLcontext * ctx,
 				  dstx, dstx + width,
 				  dsty, dsty + height,
 				  ctx->Current.RasterPos[ 2 ],
-				  0, 0, 0, 0, 0.0, 0.0, 0.0, 0.0 );
+				  0, 0.0, 0.0, 0.0, 0.0 );
    
    intel->vtbl.leave_meta_state( intel );
    
diff --git a/src/mesa/drivers/dri/i965/intel_regions.c b/src/mesa/drivers/dri/i965/intel_regions.c
index feaecbc754..89b2f15c10 100644..120000
--- a/src/mesa/drivers/dri/i965/intel_regions.c
+++ b/src/mesa/drivers/dri/i965/intel_regions.c
@@ -1,305 +1 @@
-/**************************************************************************
- * 
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/* Provide additional functionality on top of bufmgr buffers:
- *   - 2d semantics and blit operations
- *   - refcounting of buffers for multiple images in a buffer.
- *   - refcounting of buffer mappings.
- *   - some logic for moving the buffers to the best memory pools for
- *     given operations.
- *
- * Most of this is to make it easier to implement the fixed-layout
- * mipmap tree required by intel hardware in the face of GL's
- * programming interface where each image can be specifed in random
- * order and it isn't clear what layout the tree should have until the
- * last moment.
- */
-
-#include "intel_context.h"
-#include "intel_regions.h"
-#include "intel_blit.h"
-#include "dri_bufmgr.h"
-#include "intel_bufmgr_ttm.h"
-#include "imports.h"
-
-#define FILE_DEBUG_FLAG DEBUG_REGION
-
-/* XXX: Thread safety?
- */
-GLubyte *intel_region_map(struct intel_context *intel, struct intel_region *region)
-{
-   DBG("%s\n", __FUNCTION__);
-   if (!region->map_refcount++) {
-      dri_bo_map(region->buffer, GL_TRUE);
-      region->map = region->buffer->virtual;
-   }
-
-   return region->map;
-}
-
-void intel_region_unmap(struct intel_context *intel, 
-			struct intel_region *region)
-{
-   DBG("%s\n", __FUNCTION__);
-   if (!--region->map_refcount) {
-      dri_bo_unmap(region->buffer);
-      region->map = NULL;
-   }
-}
-
-struct intel_region *intel_region_alloc( struct intel_context *intel, 
-					 GLuint cpp,
-					 GLuint pitch, 
-					 GLuint height )
-{
-   struct intel_region *region = calloc(sizeof(*region), 1);
-
-   DBG("%s %dx%dx%d == 0x%x bytes\n", __FUNCTION__,
-       cpp, pitch, height, cpp*pitch*height);
-
-   region->cpp = cpp;
-   region->pitch = pitch;
-   region->height = height; 	/* needed? */
-   region->refcount = 1;
-
-   region->buffer = dri_bo_alloc(intel->bufmgr, "region",
-				 pitch * cpp * height, 64, DRM_BO_FLAG_MEM_TT);
-
-   return region;
-}
-
-void intel_region_reference( struct intel_region **dst,
-			     struct intel_region *src)
-{
-   src->refcount++;
-   assert(*dst == NULL);
-   *dst = src;
-}
-
-void intel_region_release( struct intel_context *intel,
-			   struct intel_region **region )
-{
-   if (!*region)
-      return;
-
-   DBG("%s %d\n", __FUNCTION__, (*region)->refcount-1);
-   
-   if (--(*region)->refcount == 0) {
-      assert((*region)->map_refcount == 0);
-      dri_bo_unreference((*region)->buffer);
-      free(*region);
-   }
-   *region = NULL;
-}
-
-void _mesa_copy_rect( GLubyte *dst,
-		      GLuint cpp,
-		      GLuint dst_pitch,
-		      GLuint dst_x, 
-		      GLuint dst_y,
-		      GLuint width,
-		      GLuint height,
-		      const GLubyte *src,
-		      GLuint src_pitch,
-		      GLuint src_x,
-		      GLuint src_y )
-{
-   GLuint i;
-
-   dst_pitch *= cpp;
-   src_pitch *= cpp;
-   dst += dst_x * cpp;
-   src += src_x * cpp;
-   dst += dst_y * dst_pitch;
-   src += src_y * dst_pitch;
-   width *= cpp;
-
-   if (width == dst_pitch && 
-       width == src_pitch)
-      do_memcpy(dst, src, height * width);
-   else {
-      for (i = 0; i < height; i++) {
-	 do_memcpy(dst, src, width);
-	 dst += dst_pitch;
-	 src += src_pitch;
-      }
-   }
-}
-
-
-/* Upload data to a rectangular sub-region.  Lots of choices how to do this:
- *
- * - memcpy by span to current destination
- * - upload data as new buffer and blit
- *
- * Currently always memcpy.
- */
-GLboolean intel_region_data(struct intel_context *intel, 
-			    struct intel_region *dst,
-			    GLuint dst_offset,
-			    GLuint dstx, GLuint dsty,
-			    const void *src, GLuint src_pitch,
-			    GLuint srcx, GLuint srcy,
-			    GLuint width, GLuint height)
-{
-   DBG("%s\n", __FUNCTION__);
-
-   assert (dst_offset + dstx + width +
-	   (dsty + height - 1) * dst->pitch * dst->cpp <=
-	   dst->pitch * dst->cpp * dst->height);
-
-   _mesa_copy_rect(intel_region_map(intel, dst) + dst_offset,
-                   dst->cpp,
-                   dst->pitch,
-                   dstx, dsty, width, height, src, src_pitch, srcx, srcy);
-   intel_region_unmap(intel, dst);
-
-   return GL_TRUE;
-}
-			  
-/* Copy rectangular sub-regions. Need better logic about when to
- * push buffers into AGP - will currently do so whenever possible.
- */
-void intel_region_copy( struct intel_context *intel,
-			struct intel_region *dst,
-			GLuint dst_offset,
-			GLuint dstx, GLuint dsty,
-			struct intel_region *src,
-			GLuint src_offset,
-			GLuint srcx, GLuint srcy,
-			GLuint width, GLuint height )
-{
-   DBG("%s\n", __FUNCTION__);
-
-   assert(src->cpp == dst->cpp);
-
-   intelEmitCopyBlit(intel,
-		     dst->cpp,
-		     src->pitch, src->buffer, src_offset, src->tiled,
-		     dst->pitch, dst->buffer, dst_offset, dst->tiled,
-		     srcx, srcy,
-		     dstx, dsty,
-		     width, height,
-		     GL_COPY );
-}
-
-/* Fill a rectangular sub-region.  Need better logic about when to
- * push buffers into AGP - will currently do so whenever possible.
- */
-void intel_region_fill( struct intel_context *intel,
-			struct intel_region *dst,
-			GLuint dst_offset,
-			GLuint dstx, GLuint dsty,
-			GLuint width, GLuint height,
-			GLuint color )
-{
-   DBG("%s\n", __FUNCTION__);
-   
-   intelEmitFillBlit(intel,
-		     dst->cpp,
-		     dst->pitch, dst->buffer, dst_offset, dst->tiled,
-		     dstx, dsty,
-		     width, height,
-		     color );
-}
-
-static struct intel_region *
-intel_recreate_static(struct intel_context *intel,
-		      const char *name,
-		      struct intel_region *region,
-		      intelRegion *region_desc,
-		      GLuint mem_type)
-{
-   intelScreenPrivate *intelScreen = intel->intelScreen;
-
-   if (region == NULL) {
-      region = calloc(sizeof(*region), 1);
-      region->refcount = 1;
-   }
-
-   region->cpp = intelScreen->cpp;
-   region->pitch = region_desc->pitch / intelScreen->cpp;
-   region->height = intelScreen->height;     /* needed? */
-   region->tiled = region_desc->tiled;
-
-   if (intel->ttm) {
-      assert(region_desc->bo_handle != -1);
-      region->buffer = intel_ttm_bo_create_from_handle(intel->bufmgr,
-						       name,
-						       region_desc->bo_handle);
-   } else {
-      region->buffer = dri_bo_alloc_static(intel->bufmgr,
-					   name,
-					   region_desc->offset,
-					   region_desc->pitch *
-					   intelScreen->height,
-					   region_desc->map,
-					   DRM_BO_FLAG_MEM_TT);
-   }
-
-   assert(region->buffer != NULL);
-
-   return region;
-}
-
-/**
- * Create intel_region structs to describe the static front, back, and depth
- * buffers created by the xserver.
- *
- * Although FBO's mean we now no longer use these as render targets in
- * all circumstances, they won't go away until the back and depth
- * buffers become private, and the front buffer will remain even then.
- *
- * Note that these don't allocate video memory, just describe
- * allocations alread made by the X server.
- */
-void
-intel_recreate_static_regions(struct intel_context *intel)
-{
-   intelScreenPrivate *intelScreen = intel->intelScreen;
-
-   intel->front_region =
-      intel_recreate_static(intel, "front",
-			    intel->front_region,
-			    &intelScreen->front,
-			    DRM_BO_FLAG_MEM_TT);
-
-   intel->back_region =
-      intel_recreate_static(intel, "back",
-			    intel->back_region,
-			    &intelScreen->back,
-			    DRM_BO_FLAG_MEM_TT);
-
-   /* Still assumes front.cpp == depth.cpp.  We can kill this when we move to
-    * private buffers.
-    */
-   intel->depth_region =
-      intel_recreate_static(intel, "depth",
-			    intel->depth_region,
-			    &intelScreen->depth,
-			    DRM_BO_FLAG_MEM_TT);
-}
+../intel/intel_regions.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_regions.h b/src/mesa/drivers/dri/i965/intel_regions.h
deleted file mode 100644
index 3e130203c3..0000000000
--- a/src/mesa/drivers/dri/i965/intel_regions.h
+++ /dev/null
@@ -1,152 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef INTEL_REGIONS_H
-#define INTEL_REGIONS_H
-
-#include "mtypes.h"
-#include "dri_bufmgr.h"		/* for DBG! */
-#include "intel_screen.h"
-struct intel_context;
-
-/* A layer on top of the bufmgr buffers that adds a few useful things:
- *
- * - Refcounting for local buffer references.
- * - Refcounting for buffer maps
- * - Buffer dimensions - pitch and height.
- * - Blitter commands for copying 2D regions between buffers.
- */
-struct intel_region {
-   dri_bo *buffer;
-   GLuint refcount;
-   GLuint cpp;
-   GLuint pitch;
-   GLuint height;
-   GLboolean tiled;
-   GLubyte *map;
-   GLuint map_refcount;
-};
-
-/* Allocate a refcounted region.  Pointers to regions should only be
- * copied by calling intel_reference_region().
- *
- * No support for dynamically allocating tiled regions at this point.
- */
-struct intel_region *intel_region_alloc( struct intel_context *intel,
-					 GLuint cpp,
-					 GLuint pitch, 
-					 GLuint height );
-
-void intel_region_reference( struct intel_region **dst, 
-			     struct intel_region *src );
-
-void intel_region_release(struct intel_context *intel,
-			  struct intel_region **ib );
-
-void intel_recreate_static_regions(struct intel_context *intel);
-
-/* Static regions may be tiled.  The assumption is that the X server
- * has set up fence registers to define tiled zones in agp and these
- * buffers are within those zones.  Tiling regions without fence
- * registers is more work.
- */
-struct intel_region *
-intel_region_create_static(intelScreenPrivate *intelScreen,
-			   char *name,
-			   GLuint mem_type,
-			   unsigned int bo_handle,
-			   GLuint offset,
-			   void *virtual,
-			   GLuint cpp,
-			   GLuint pitch, GLuint height, GLboolean tiled);
-void
-intel_region_update_static(intelScreenPrivate *intelScreen,
-			   struct intel_region *region,
-			   GLuint mem_type,
-			   unsigned int bo_handle,
-			   GLuint offset,
-			   void *virtual,
-			   GLuint cpp, GLuint pitch, GLuint height,
-			   GLboolean tiled);
-
-/* Map/unmap regions.  This is refcounted also: 
- */
-GLubyte *intel_region_map(struct intel_context *intel, 
-		       struct intel_region *ib);
-
-void intel_region_unmap(struct intel_context *intel,
-			struct intel_region *ib);
-
-
-/* Upload data to a rectangular sub-region
- */
-GLboolean intel_region_data(struct intel_context *intel, 
-			    struct intel_region *dest,
-			    GLuint dest_offset,
-			    GLuint destx, GLuint desty,
-			    const void *src, GLuint src_stride,
-			    GLuint srcx, GLuint srcy,
-			    GLuint width, GLuint height);
-			  
-/* Copy rectangular sub-regions
- */
-void intel_region_copy( struct intel_context *intel,
-			struct intel_region *dest,
-			GLuint dest_offset,
-			GLuint destx, GLuint desty,
-			struct intel_region *src,
-			GLuint src_offset,
-			GLuint srcx, GLuint srcy,
-			GLuint width, GLuint height );
-
-/* Fill a rectangular sub-region
- */
-void intel_region_fill( struct intel_context *intel,
-			struct intel_region *dest,
-			GLuint dest_offset,
-			GLuint destx, GLuint desty,
-			GLuint width, GLuint height,
-			GLuint color );
-
-
-/***********************************************************************
- * Misc utilities: move to somewhere generic
- */
-void _mesa_copy_rect( GLubyte *dst,
-		      GLuint cpp,
-		      GLuint dst_pitch,
-		      GLuint dst_x, 
-		      GLuint dst_y,
-		      GLuint width,
-		      GLuint height,
-		      const GLubyte *src,
-		      GLuint src_pitch,
-		      GLuint src_x,
-		      GLuint src_y );
-
-
-#endif
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index 61d2b9a7b2..f2db48272b 100644..120000
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1,693 +1 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "glheader.h"
-#include "context.h"
-#include "framebuffer.h"
-#include "matrix.h"
-#include "renderbuffer.h"
-#include "simple_list.h"
-#include "utils.h"
-#include "vblank.h"
-#include "xmlpool.h"
-
-
-#include "intel_screen.h"
-
-#include "intel_context.h"
-#include "intel_tex.h"
-#include "intel_span.h"
-#include "intel_ioctl.h"
-#include "intel_regions.h"
-#include "intel_bufmgr_ttm.h"
-
-#include "i915_drm.h"
-#include "i830_dri.h"
-
-PUBLIC const char __driConfigOptions[] =
-DRI_CONF_BEGIN
-    DRI_CONF_SECTION_PERFORMANCE
-       DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS) 
-       DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
-    DRI_CONF_SECTION_END
-    DRI_CONF_SECTION_QUALITY
-       DRI_CONF_FORCE_S3TC_ENABLE(false)
-       DRI_CONF_ALLOW_LARGE_TEXTURES(1)
-      DRI_CONF_SECTION_END
-DRI_CONF_END;
-const GLuint __driNConfigOptions = 4;
-
-#ifdef USE_NEW_INTERFACE
-static PFNGLXCREATECONTEXTMODES create_context_modes = NULL;
-#endif /*USE_NEW_INTERFACE*/
-
-/**
- * Map all the memory regions described by the screen.
- * \return GL_TRUE if success, GL_FALSE if error.
- */
-GLboolean
-intelMapScreenRegions(__DRIscreenPrivate *sPriv)
-{
-   intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private;
-
-   if (intelScreen->front.handle) {
-      if (drmMap(sPriv->fd,
-                 intelScreen->front.handle,
-                 intelScreen->front.size,
-                 (drmAddress *)&intelScreen->front.map) != 0) {
-         _mesa_problem(NULL, "drmMap(frontbuffer) failed!");
-         return GL_FALSE;
-      }
-   } else {
-      /* Use the old static allocation method if the server isn't setting up
-       * a movable handle for us.  Add in the front buffer offset from
-       * framebuffer start, as our span routines (unlike other drivers) expect
-       * the renderbuffer address to point to the beginning of the
-       * renderbuffer.
-       */
-      intelScreen->front.map = (char *)sPriv->pFB;
-      if (intelScreen->front.map == NULL) {
-	 fprintf(stderr, "Failed to find framebuffer mapping\n");
-	 return GL_FALSE;
-      }
-   }
-
-   if (drmMap(sPriv->fd,
-              intelScreen->back.handle,
-              intelScreen->back.size,
-              (drmAddress *)&intelScreen->back.map) != 0) {
-      intelUnmapScreenRegions(intelScreen);
-      return GL_FALSE;
-   }
-
-   if (drmMap(sPriv->fd,
-              intelScreen->depth.handle,
-              intelScreen->depth.size,
-              (drmAddress *)&intelScreen->depth.map) != 0) {
-      intelUnmapScreenRegions(intelScreen);
-      return GL_FALSE;
-   }
-
-   if (drmMap(sPriv->fd,
-              intelScreen->tex.handle,
-              intelScreen->tex.size,
-              (drmAddress *)&intelScreen->tex.map) != 0) {
-      intelUnmapScreenRegions(intelScreen);
-      return GL_FALSE;
-   }
-
-   if (0)
-      printf("Mappings:  front: %p  back: %p  depth: %p  tex: %p\n",
-          intelScreen->front.map,
-          intelScreen->back.map,
-          intelScreen->depth.map,
-          intelScreen->tex.map);
-   return GL_TRUE;
-}
-
-void
-intelUnmapScreenRegions(intelScreenPrivate *intelScreen)
-{
-#define REALLY_UNMAP 1
-   /* If front.handle is present, we're doing the dynamic front buffer mapping,
-    * but if we've fallen back to static allocation then we shouldn't try to
-    * unmap here.
-    */
-   if (intelScreen->front.handle) {
-#if REALLY_UNMAP
-      if (drmUnmap(intelScreen->front.map, intelScreen->front.size) != 0)
-         printf("drmUnmap front failed!\n");
-#endif
-      intelScreen->front.map = NULL;
-   }
-   if (intelScreen->back.map) {
-#if REALLY_UNMAP
-      if (drmUnmap(intelScreen->back.map, intelScreen->back.size) != 0)
-         printf("drmUnmap back failed!\n");
-#endif
-      intelScreen->back.map = NULL;
-   }
-   if (intelScreen->depth.map) {
-#if REALLY_UNMAP
-      drmUnmap(intelScreen->depth.map, intelScreen->depth.size);
-      intelScreen->depth.map = NULL;
-#endif
-   }
-   if (intelScreen->tex.map) {
-#if REALLY_UNMAP
-      drmUnmap(intelScreen->tex.map, intelScreen->tex.size);
-      intelScreen->tex.map = NULL;
-#endif
-   }
-}
-
-
-static void
-intelPrintDRIInfo(intelScreenPrivate *intelScreen,
-                  __DRIscreenPrivate *sPriv,
-                  I830DRIPtr gDRIPriv)
-{
-   fprintf(stderr, "*** Front size:   0x%x  offset: 0x%x  pitch: %d\n",
-           intelScreen->front.size, intelScreen->front.offset,
-           intelScreen->front.pitch);
-   fprintf(stderr, "*** Back size:    0x%x  offset: 0x%x  pitch: %d\n",
-           intelScreen->back.size, intelScreen->back.offset,
-           intelScreen->back.pitch);
-   fprintf(stderr, "*** Depth size:   0x%x  offset: 0x%x  pitch: %d\n",
-           intelScreen->depth.size, intelScreen->depth.offset,
-           intelScreen->depth.pitch);
-   fprintf(stderr, "*** Rotated size: 0x%x  offset: 0x%x  pitch: %d\n",
-           intelScreen->rotated.size, intelScreen->rotated.offset,
-           intelScreen->rotated.pitch);
-   fprintf(stderr, "*** Texture size: 0x%x  offset: 0x%x\n",
-           intelScreen->tex.size, intelScreen->tex.offset);
-   fprintf(stderr, "*** Memory : 0x%x\n", gDRIPriv->mem);
-}
-
-
-static void
-intelPrintSAREA(volatile drmI830Sarea *sarea)
-{
-   fprintf(stderr, "SAREA: sarea width %d  height %d\n", sarea->width, sarea->height);
-   fprintf(stderr, "SAREA: pitch: %d\n", sarea->pitch);
-   fprintf(stderr,
-           "SAREA: front offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
-           sarea->front_offset, sarea->front_size,
-           (unsigned) sarea->front_handle);
-   fprintf(stderr,
-           "SAREA: back  offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
-           sarea->back_offset, sarea->back_size,
-           (unsigned) sarea->back_handle);
-   fprintf(stderr, "SAREA: depth offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
-           sarea->depth_offset, sarea->depth_size,
-           (unsigned) sarea->depth_handle);
-   fprintf(stderr, "SAREA: tex   offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
-           sarea->tex_offset, sarea->tex_size,
-           (unsigned) sarea->tex_handle);
-   fprintf(stderr, "SAREA: rotation: %d\n", sarea->rotation);
-   fprintf(stderr,
-           "SAREA: rotated offset: 0x%08x  size: 0x%x\n",
-           sarea->rotated_offset, sarea->rotated_size);
-   fprintf(stderr, "SAREA: rotated pitch: %d\n", sarea->rotated_pitch);
-}
-
-
-/**
- * A number of the screen parameters are obtained/computed from
- * information in the SAREA.  This function updates those parameters.
- */
-void
-intelUpdateScreenFromSAREA(intelScreenPrivate *intelScreen,
-                           volatile drmI830Sarea *sarea)
-{
-   intelScreen->width = sarea->width;
-   intelScreen->height = sarea->height;
-
-   intelScreen->front.offset = sarea->front_offset;
-   intelScreen->front.pitch = sarea->pitch * intelScreen->cpp;
-   intelScreen->front.handle = sarea->front_handle;
-   intelScreen->front.size = sarea->front_size;
-   intelScreen->front.tiled = sarea->front_tiled;
-
-   intelScreen->back.offset = sarea->back_offset;
-   intelScreen->back.pitch = sarea->pitch * intelScreen->cpp;
-   intelScreen->back.handle = sarea->back_handle;
-   intelScreen->back.size = sarea->back_size;
-   intelScreen->back.tiled = sarea->back_tiled;
-
-   intelScreen->depth.offset = sarea->depth_offset;
-   intelScreen->depth.pitch = sarea->pitch * intelScreen->cpp;
-   intelScreen->depth.handle = sarea->depth_handle;
-   intelScreen->depth.size = sarea->depth_size;
-   intelScreen->depth.tiled = sarea->depth_tiled;
-
-   if (intelScreen->driScrnPriv->ddx_version.minor >= 9) {
-      intelScreen->front.bo_handle = sarea->front_bo_handle;
-      intelScreen->back.bo_handle = sarea->back_bo_handle;
-      intelScreen->depth.bo_handle = sarea->depth_bo_handle;
-   } else {
-      intelScreen->front.bo_handle = -1;
-      intelScreen->back.bo_handle = -1;
-      intelScreen->depth.bo_handle = -1;
-   }
-
-   intelScreen->tex.offset = sarea->tex_offset;
-   intelScreen->logTextureGranularity = sarea->log_tex_granularity;
-   intelScreen->tex.handle = sarea->tex_handle;
-   intelScreen->tex.size = sarea->tex_size;
-
-   intelScreen->rotated.offset = sarea->rotated_offset;
-   intelScreen->rotated.pitch = sarea->rotated_pitch * intelScreen->cpp;
-   intelScreen->rotated.size = sarea->rotated_size;
-   intelScreen->rotated.tiled = sarea->rotated_tiled;
-   intelScreen->current_rotation = sarea->rotation;
-#if 0
-   matrix23Rotate(&intelScreen->rotMatrix,
-                  sarea->width, sarea->height, sarea->rotation);
-#endif
-   intelScreen->rotatedWidth = sarea->virtualX;
-   intelScreen->rotatedHeight = sarea->virtualY;
-
-   if (0)
-      intelPrintSAREA(sarea);
-}
-
-static const __DRIextension *intelExtensions[] = {
-    &driReadDrawableExtension,
-    &driCopySubBufferExtension.base,
-    &driSwapControlExtension.base,
-    &driFrameTrackingExtension.base,
-    &driMediaStreamCounterExtension.base,
-    NULL
-};
-
-static GLboolean intelInitDriver(__DRIscreenPrivate *sPriv)
-{
-   intelScreenPrivate *intelScreen;
-   I830DRIPtr         gDRIPriv = (I830DRIPtr)sPriv->pDevPriv;
-   volatile drmI830Sarea *sarea;
-
-   if (sPriv->devPrivSize != sizeof(I830DRIRec)) {
-      fprintf(stderr,"\nERROR!  sizeof(I830DRIRec) (%ld) does not match passed size from device driver (%d)\n", (unsigned long)sizeof(I830DRIRec), sPriv->devPrivSize);
-      return GL_FALSE;
-   }
-
-   /* Allocate the private area */
-   intelScreen = (intelScreenPrivate *)CALLOC(sizeof(intelScreenPrivate));
-   if (!intelScreen) {
-      fprintf(stderr,"\nERROR!  Allocating private area failed\n");
-      return GL_FALSE;
-   }
-   /* parse information in __driConfigOptions */
-   driParseOptionInfo (&intelScreen->optionCache,
-		       __driConfigOptions, __driNConfigOptions);
-
-   intelScreen->driScrnPriv = sPriv;
-   sPriv->private = (void *)intelScreen;
-   intelScreen->sarea_priv_offset = gDRIPriv->sarea_priv_offset;
-   sarea = (volatile drmI830Sarea *)
-         (((GLubyte *)sPriv->pSAREA)+intelScreen->sarea_priv_offset);
-
-   intelScreen->deviceID = gDRIPriv->deviceID;
-   intelScreen->mem = gDRIPriv->mem;
-   intelScreen->cpp = gDRIPriv->cpp;
-
-   switch (gDRIPriv->bitsPerPixel) {
-   case 15: intelScreen->fbFormat = DV_PF_555; break;
-   case 16: intelScreen->fbFormat = DV_PF_565; break;
-   case 32: intelScreen->fbFormat = DV_PF_8888; break;
-   }
-			 
-   intelUpdateScreenFromSAREA(intelScreen, sarea);
-
-   if (0)
-      intelPrintDRIInfo(intelScreen, sPriv, gDRIPriv);
-
-   if (!intelMapScreenRegions(sPriv)) {
-      fprintf(stderr,"\nERROR!  mapping regions\n");
-      _mesa_free(intelScreen);
-      sPriv->private = NULL;
-      return GL_FALSE;
-   }
-
-   intelScreen->drmMinor = sPriv->drm_version.minor;
-
-   /* Determine if IRQs are active? */
-   {
-      int ret;
-      drmI830GetParam gp;
-
-      gp.param = I830_PARAM_IRQ_ACTIVE;
-      gp.value = &intelScreen->irq_active;
-
-      ret = drmCommandWriteRead( sPriv->fd, DRM_I830_GETPARAM,
-				 &gp, sizeof(gp));
-      if (ret) {
-	 fprintf(stderr, "drmI830GetParam: %d\n", ret);
-	 return GL_FALSE;
-      }
-   }
-
-   /* Determine if batchbuffers are allowed */
-   {
-      int ret;
-      drmI830GetParam gp;
-
-      gp.param = I830_PARAM_ALLOW_BATCHBUFFER;
-      gp.value = &intelScreen->allow_batchbuffer;
-
-      ret = drmCommandWriteRead( sPriv->fd, DRM_I830_GETPARAM,
-				 &gp, sizeof(gp));
-      if (ret) {
-	 fprintf(stderr, "drmI830GetParam: (%d) %d\n", gp.param, ret);
-	 return GL_FALSE;
-      }
-   }
-
-   sPriv->extensions = intelExtensions;
-
-   return GL_TRUE;
-}
-
-
-static void intelDestroyScreen(__DRIscreenPrivate *sPriv)
-{
-   intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private;
-
-   intelUnmapScreenRegions(intelScreen);
-   FREE(intelScreen);
-   sPriv->private = NULL;
-}
-
-static GLboolean intelCreateBuffer( __DRIscreenPrivate *driScrnPriv,
-				    __DRIdrawablePrivate *driDrawPriv,
-				    const __GLcontextModes *mesaVis,
-				    GLboolean isPixmap )
-{
-   intelScreenPrivate *screen = (intelScreenPrivate *) driScrnPriv->private;
-
-   if (isPixmap) {
-      return GL_FALSE; /* not implemented */
-   } else {
-      GLboolean swStencil = (mesaVis->stencilBits > 0 && 
-			     mesaVis->depthBits != 24);
-
-      struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis);
-
-      {
-         driRenderbuffer *frontRb
-            = driNewRenderbuffer(GL_RGBA,
-                                 screen->front.map,
-                                 screen->cpp,
-                                 screen->front.offset, screen->front.pitch,
-                                 driDrawPriv);
-         intelSetSpanFunctions(frontRb, mesaVis);
-         _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontRb->Base);
-      }
-
-      if (mesaVis->doubleBufferMode) {
-         driRenderbuffer *backRb
-            = driNewRenderbuffer(GL_RGBA,
-                                 screen->back.map,
-                                 screen->cpp,
-                                 screen->back.offset, screen->back.pitch,
-                                 driDrawPriv);
-         intelSetSpanFunctions(backRb, mesaVis);
-         _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backRb->Base);
-      }
-
-      if (mesaVis->depthBits == 16) {
-         driRenderbuffer *depthRb
-            = driNewRenderbuffer(GL_DEPTH_COMPONENT16,
-                                 screen->depth.map,
-                                 screen->cpp,
-                                 screen->depth.offset, screen->depth.pitch,
-                                 driDrawPriv);
-         intelSetSpanFunctions(depthRb, mesaVis);
-         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
-      }
-      else if (mesaVis->depthBits == 24) {
-         driRenderbuffer *depthRb
-            = driNewRenderbuffer(GL_DEPTH_COMPONENT24,
-                                 screen->depth.map,
-                                 screen->cpp,
-                                 screen->depth.offset, screen->depth.pitch,
-                                 driDrawPriv);
-         intelSetSpanFunctions(depthRb, mesaVis);
-         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
-      }
-
-      if (mesaVis->stencilBits > 0 && !swStencil) {
-         driRenderbuffer *stencilRb
-            = driNewRenderbuffer(GL_STENCIL_INDEX8_EXT,
-                                 screen->depth.map,
-                                 screen->cpp,
-                                 screen->depth.offset, screen->depth.pitch,
-                                 driDrawPriv);
-         intelSetSpanFunctions(stencilRb, mesaVis);
-         _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencilRb->Base);
-      }
-
-      _mesa_add_soft_renderbuffers(fb,
-                                   GL_FALSE, /* color */
-                                   GL_FALSE, /* depth */
-                                   swStencil,
-                                   mesaVis->accumRedBits > 0,
-                                   GL_FALSE, /* alpha */
-                                   GL_FALSE /* aux */);
-      driDrawPriv->driverPrivate = (void *) fb;
-
-      return (driDrawPriv->driverPrivate != NULL);
-   }
-}
-
-static void intelDestroyBuffer(__DRIdrawablePrivate *driDrawPriv)
-{
-   _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)));
-}
-
-
-/**
- * Get information about previous buffer swaps.
- */
-static int
-intelGetSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo )
-{
-   struct intel_context *intel;
-
-   if ( (dPriv == NULL) || (dPriv->driContextPriv == NULL)
-	|| (dPriv->driContextPriv->driverPrivate == NULL)
-	|| (sInfo == NULL) ) {
-      return -1;
-   }
-
-   intel = dPriv->driContextPriv->driverPrivate;
-   sInfo->swap_count = intel->swap_count;
-   sInfo->swap_ust = intel->swap_ust;
-   sInfo->swap_missed_count = intel->swap_missed_count;
-
-   sInfo->swap_missed_usage = (sInfo->swap_missed_count != 0)
-       ? driCalculateSwapUsage( dPriv, 0, intel->swap_missed_ust )
-       : 0.0;
-
-   return 0;
-}
-
-
-/* There are probably better ways to do this, such as an
- * init-designated function to register chipids and createcontext
- * functions.
- */
-extern GLboolean i830CreateContext( const __GLcontextModes *mesaVis,
-				    __DRIcontextPrivate *driContextPriv,
-				    void *sharedContextPrivate);
-
-extern GLboolean i915CreateContext( const __GLcontextModes *mesaVis,
-				   __DRIcontextPrivate *driContextPriv,
-				   void *sharedContextPrivate);
-
-extern GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
-				   __DRIcontextPrivate *driContextPriv,
-				   void *sharedContextPrivate);
-
-
-
-
-static GLboolean intelCreateContext( const __GLcontextModes *mesaVis,
-				   __DRIcontextPrivate *driContextPriv,
-				   void *sharedContextPrivate)
-{
-#if 0
-   __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
-   intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private;
-   switch (intelScreen->deviceID) {
-   case PCI_CHIP_845_G:
-   case PCI_CHIP_I830_M:
-   case PCI_CHIP_I855_GM:
-   case PCI_CHIP_I865_G:
-      return i830CreateContext( mesaVis, driContextPriv, 
-				sharedContextPrivate );
-
-   case PCI_CHIP_I915_G:
-   case PCI_CHIP_I915_GM:
-   case PCI_CHIP_I945_G:
-   case PCI_CHIP_I945_GM:
-      return i915CreateContext( mesaVis, driContextPriv, 
-			       sharedContextPrivate );
- 
-   default:
-      fprintf(stderr, "Unrecognized deviceID %x\n", intelScreen->deviceID);
-      return GL_FALSE;
-   }
-#else
-   return brwCreateContext( mesaVis, driContextPriv, 
-			    sharedContextPrivate );
-#endif
-}
-
-
-static const struct __DriverAPIRec intelAPI = {
-   .DestroyScreen   = intelDestroyScreen,
-   .CreateContext   = intelCreateContext,
-   .DestroyContext  = intelDestroyContext,
-   .CreateBuffer    = intelCreateBuffer,
-   .DestroyBuffer   = intelDestroyBuffer,
-   .SwapBuffers     = intelSwapBuffers,
-   .MakeCurrent     = intelMakeCurrent,
-   .UnbindContext   = intelUnbindContext,
-   .GetSwapInfo     = intelGetSwapInfo,
-   .GetMSC          = driGetMSC32,
-   .GetDrawableMSC  = driDrawableGetMSC32,
-   .WaitForMSC      = driWaitForMSC32,
-   .WaitForSBC      = NULL,
-   .SwapBuffersMSC  = NULL,
-   .CopySubBuffer   = intelCopySubBuffer
-};
-
-
-static __GLcontextModes *
-intelFillInModes( unsigned pixel_bits, unsigned depth_bits,
-		 unsigned stencil_bits, GLboolean have_back_buffer )
-{
-   __GLcontextModes * modes;
-   __GLcontextModes * m;
-   unsigned num_modes;
-   unsigned depth_buffer_factor;
-   unsigned back_buffer_factor;
-   GLenum fb_format;
-   GLenum fb_type;
-
-   /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't
-    * support pageflipping at all.
-    */
-   static const GLenum back_buffer_modes[] = {
-      GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML
-   };
-
-   u_int8_t depth_bits_array[3];
-   u_int8_t stencil_bits_array[3];
-
-
-   depth_bits_array[0] = 0;
-   depth_bits_array[1] = depth_bits;
-   depth_bits_array[2] = depth_bits;
-
-   /* Just like with the accumulation buffer, always provide some modes
-    * with a stencil buffer.  It will be a sw fallback, but some apps won't
-    * care about that.
-    */
-   stencil_bits_array[0] = 0;
-   stencil_bits_array[1] = 0;
-   stencil_bits_array[2] = (stencil_bits == 0) ? 8 : stencil_bits;
-
-   depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 3 : 1;
-   back_buffer_factor  = (have_back_buffer) ? 3 : 1;
-
-   num_modes = depth_buffer_factor * back_buffer_factor * 4;
-
-    if ( pixel_bits == 16 ) {
-        fb_format = GL_RGB;
-        fb_type = GL_UNSIGNED_SHORT_5_6_5;
-    }
-    else {
-        fb_format = GL_BGRA;
-        fb_type = GL_UNSIGNED_INT_8_8_8_8_REV;
-    }
-
-   modes = (*dri_interface->createContextModes)( num_modes, sizeof( __GLcontextModes ) );
-   m = modes;
-   if ( ! driFillInModes( & m, fb_format, fb_type,
-			  depth_bits_array, stencil_bits_array, depth_buffer_factor,
-			  back_buffer_modes, back_buffer_factor,
-			  GLX_TRUE_COLOR ) ) {
-	fprintf( stderr, "[%s:%u] Error creating FBConfig!\n",
-		 __func__, __LINE__ );
-	return NULL;
-   }
-   if ( ! driFillInModes( & m, fb_format, fb_type,
-			  depth_bits_array, stencil_bits_array, depth_buffer_factor,
-			  back_buffer_modes, back_buffer_factor,
-			  GLX_DIRECT_COLOR ) ) {
-	fprintf( stderr, "[%s:%u] Error creating FBConfig!\n",
-		 __func__, __LINE__ );
-	return NULL;
-   }
-
-   /* Mark the visual as slow if there are "fake" stencil bits.
-    */
-   for ( m = modes ; m != NULL ; m = m->next ) {
-      if ( (m->stencilBits != 0) && (m->stencilBits != stencil_bits) ) {
-	 m->visualRating = GLX_SLOW_CONFIG;
-      }
-   }
-
-   return modes;
-}
-
-
-/**
- * This is the driver specific part of the createNewScreen entry point.
- * 
- * \todo maybe fold this into intelInitDriver
- *
- * \return the __GLcontextModes supported by this driver
- */
-__GLcontextModes *__driDriverInitScreen(__DRIscreenPrivate *psp)
-{
-   static const __DRIversion ddx_expected = { 1, 6, 0 };
-   static const __DRIversion dri_expected = { 4, 0, 0 };
-   static const __DRIversion drm_expected = { 1, 3, 0 };
-   I830DRIPtr dri_priv = (I830DRIPtr) psp->pDevPriv;
-
-   psp->DriverAPI = intelAPI;
-   if ( ! driCheckDriDdxDrmVersions2( "i915",
-				      &psp->dri_version, &dri_expected,
-				      &psp->ddx_version, &ddx_expected,
-				      &psp->drm_version, &drm_expected ) ) {
-       return NULL;
-   }
-
-   /* Calling driInitExtensions here, with a NULL context pointer,
-    * does not actually enable the extensions.  It just makes sure
-    * that all the dispatch offsets for all the extensions that
-    * *might* be enables are known.  This is needed because the
-    * dispatch offsets need to be known when _mesa_context_create is
-    * called, but we can't enable the extensions until we have a
-    * context pointer.
-    *
-    * Hello chicken.  Hello egg.  How are you two today?
-    */
-   intelInitExtensions(NULL, GL_FALSE);
-
-   if (!intelInitDriver(psp))
-       return NULL;
-
-   return intelFillInModes( dri_priv->cpp * 8,
-			    (dri_priv->cpp == 2) ? 16 : 24,
-			    (dri_priv->cpp == 2) ? 0  : 8,
-			    GL_TRUE );
-}
+../intel/intel_screen.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h
deleted file mode 100644
index fb8f6a366c..0000000000
--- a/src/mesa/drivers/dri/i965/intel_screen.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef _INTEL_INIT_H_
-#define _INTEL_INIT_H_
-
-#include <sys/time.h>
-#include "dri_util.h"
-#include "dri_bufmgr.h"
-#include "xmlconfig.h"
-#include "i830_common.h"
-
-/* XXX: change name or eliminate to avoid conflict with "struct
- * intel_region"!!!
- */
-typedef struct {
-   drm_handle_t handle;
-   drmSize size;        /* region size in bytes */
-   char *map;           /* memory map */
-   int offset;          /* from start of video mem, in bytes */
-   int pitch;           /* row stride, in pixels */
-   unsigned int bo_handle;
-   unsigned int tiled; 
-} intelRegion;
-
-typedef struct 
-{
-   intelRegion front;
-   intelRegion back;
-   intelRegion rotated;
-   intelRegion depth;
-   intelRegion tex;
-
-   int deviceID;
-   int width;
-   int height;
-   int mem;         /* unused */
-
-   int cpp;         /* for front and back buffers */
-   int fbFormat;
-
-   int logTextureGranularity;
-   
-   __DRIscreenPrivate *driScrnPriv;
-   unsigned int sarea_priv_offset;
-
-   int drmMinor;
-
-   int irq_active;
-   int allow_batchbuffer;
-
-/*    struct matrix23 rotMatrix; */
-
-   int current_rotation;  /* 0, 90, 180 or 270 */
-   int rotatedWidth, rotatedHeight;
-
-   /**
-    * Configuration cache with default values for all contexts 
-    */
-   driOptionCache optionCache;
-} intelScreenPrivate;
-
-
-extern GLboolean
-intelMapScreenRegions(__DRIscreenPrivate *sPriv);
-
-extern void
-intelUnmapScreenRegions(intelScreenPrivate *intelScreen);
-
-extern void
-intelUpdateScreenFromSAREA(intelScreenPrivate *intelScreen,
-                           volatile drmI830Sarea *sarea);
-
-extern void
-intelDestroyContext(__DRIcontextPrivate *driContextPriv);
-
-extern GLboolean
-intelUnbindContext(__DRIcontextPrivate *driContextPriv);
-
-extern GLboolean
-intelMakeCurrent(__DRIcontextPrivate *driContextPriv,
-                 __DRIdrawablePrivate *driDrawPriv,
-                 __DRIdrawablePrivate *driReadPriv);
-
-extern void
-intelSwapBuffers(__DRIdrawablePrivate *dPriv);
-
-extern void 
-intelCopySubBuffer( __DRIdrawablePrivate *dPriv,
-		    int x, int y, int w, int h );
-
-#endif
diff --git a/src/mesa/drivers/dri/i965/intel_span.c b/src/mesa/drivers/dri/i965/intel_span.c
index 60fbeccdc5..05e5e8e583 100644..120000
--- a/src/mesa/drivers/dri/i965/intel_span.c
+++ b/src/mesa/drivers/dri/i965/intel_span.c
@@ -1,283 +1 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "glheader.h"
-#include "macros.h"
-#include "mtypes.h"
-#include "colormac.h"
-
-#include "intel_screen.h"
-#include "intel_regions.h"
-#include "intel_span.h"
-#include "intel_ioctl.h"
-#include "intel_tex.h"
-#include "intel_batchbuffer.h"
-#include "swrast/swrast.h"
-
-#undef DBG
-#define DBG 0
-
-#define LOCAL_VARS						\
-   struct intel_context *intel = intel_context(ctx);                    \
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;		\
-   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
-   GLuint pitch = drb->pitch;					\
-   GLuint height = dPriv->h;					\
-   char *buf = (char *) drb->Base.Data +			\
-			dPriv->x * drb->cpp +			\
-			dPriv->y * pitch;			\
-   GLushort p;							\
-   (void) buf; (void) p
-
-#define LOCAL_DEPTH_VARS					\
-   struct intel_context *intel = intel_context(ctx);                    \
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;		\
-   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
-   GLuint pitch = drb->pitch;					\
-   GLuint height = dPriv->h;					\
-   char *buf = (char *) drb->Base.Data +			\
-			dPriv->x * drb->cpp +			\
-			dPriv->y * pitch
-
-#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS 
-
-#define INIT_MONO_PIXEL(p,color)\
-	 p = INTEL_PACKCOLOR565(color[0],color[1],color[2])
-
-#define Y_FLIP(_y) (height - _y - 1)
-
-#define HW_LOCK()
-
-#define HW_UNLOCK()
-
-/* 16 bit, 565 rgb color spanline and pixel functions
- */
-#define WRITE_RGBA( _x, _y, r, g, b, a )				\
-   *(GLushort *)(buf + _x*2 + _y*pitch)  = ( (((int)r & 0xf8) << 8) |	\
-		                             (((int)g & 0xfc) << 3) |	\
-		                             (((int)b & 0xf8) >> 3))
-#define WRITE_PIXEL( _x, _y, p )  \
-   *(GLushort *)(buf + _x*2 + _y*pitch) = p
-
-#define READ_RGBA( rgba, _x, _y )				\
-do {								\
-   GLushort p = *(GLushort *)(buf + _x*2 + _y*pitch);		\
-   rgba[0] = (((p >> 11) & 0x1f) * 255) / 31;			\
-   rgba[1] = (((p >>  5) & 0x3f) * 255) / 63;			\
-   rgba[2] = (((p >>  0) & 0x1f) * 255) / 31;			\
-   rgba[3] = 255;						\
-} while(0)
-
-#define TAG(x) intel##x##_565
-#include "spantmp.h"
-
-/* 15 bit, 555 rgb color spanline and pixel functions
- */
-#define WRITE_RGBA( _x, _y, r, g, b, a )			\
-   *(GLushort *)(buf + _x*2 + _y*pitch)  = (((r & 0xf8) << 7) |	\
-		                            ((g & 0xf8) << 3) |	\
-                         		    ((b & 0xf8) >> 3))
-
-#define WRITE_PIXEL( _x, _y, p )  \
-   *(GLushort *)(buf + _x*2 + _y*pitch)  = p
-
-#define READ_RGBA( rgba, _x, _y )				\
-do {								\
-   GLushort p = *(GLushort *)(buf + _x*2 + _y*pitch);		\
-   rgba[0] = (p >> 7) & 0xf8;					\
-   rgba[1] = (p >> 3) & 0xf8;					\
-   rgba[2] = (p << 3) & 0xf8;					\
-   rgba[3] = 255;						\
-} while(0)
-
-#define TAG(x) intel##x##_555
-#include "spantmp.h"
-
-/* 16 bit depthbuffer functions.
- */
-#define WRITE_DEPTH( _x, _y, d ) \
-   *(GLushort *)(buf + (_x)*2 + (_y)*pitch)  = d;
-
-#define READ_DEPTH( d, _x, _y )	\
-   d = *(GLushort *)(buf + (_x)*2 + (_y)*pitch);	 
-
-
-#define TAG(x) intel##x##_z16
-#include "depthtmp.h"
-
-
-#undef LOCAL_VARS
-#define LOCAL_VARS						\
-   struct intel_context *intel = intel_context(ctx);			\
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;		\
-   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
-   GLuint pitch = drb->pitch;					\
-   GLuint height = dPriv->h;					\
-   char *buf = (char *)drb->Base.Data +				\
-			dPriv->x * drb->cpp +			\
-			dPriv->y * pitch;			\
-   GLuint p;							\
-   (void) buf; (void) p
-
-#undef INIT_MONO_PIXEL
-#define INIT_MONO_PIXEL(p,color)\
-	 p = INTEL_PACKCOLOR8888(color[0],color[1],color[2],color[3])
-
-/* 32 bit, 8888 argb color spanline and pixel functions
- */
-#define WRITE_RGBA(_x, _y, r, g, b, a)			\
-    *(GLuint *)(buf + _x*4 + _y*pitch) = ((r << 16) |	\
-					  (g << 8)  |	\
-					  (b << 0)  |	\
-					  (a << 24) )
-
-#define WRITE_PIXEL(_x, _y, p)			\
-    *(GLuint *)(buf + _x*4 + _y*pitch) = p
-
-
-#define READ_RGBA(rgba, _x, _y)					\
-    do {							\
-	GLuint p = *(GLuint *)(buf + _x*4 + _y*pitch);		\
-	rgba[0] = (p >> 16) & 0xff;				\
-	rgba[1] = (p >> 8)  & 0xff;				\
-	rgba[2] = (p >> 0)  & 0xff;				\
-	rgba[3] = (p >> 24) & 0xff;				\
-    } while (0)
-
-#define TAG(x) intel##x##_8888
-#include "spantmp.h"
-
-
-/* 24/8 bit interleaved depth/stencil functions
- */
-#define WRITE_DEPTH( _x, _y, d ) {			\
-   GLuint tmp = *(GLuint *)(buf + (_x)*4 + (_y)*pitch);	\
-   tmp &= 0xff000000;					\
-   tmp |= (d) & 0xffffff;				\
-   *(GLuint *)(buf + (_x)*4 + (_y)*pitch) = tmp;		\
-}
-
-#define READ_DEPTH( d, _x, _y )		\
-   d = *(GLuint *)(buf + (_x)*4 + (_y)*pitch) & 0xffffff;
-
-
-#define TAG(x) intel##x##_z24_s8
-#include "depthtmp.h"
-
-#define WRITE_STENCIL( _x, _y, d ) {			\
-   GLuint tmp = *(GLuint *)(buf + (_x)*4 + (_y)*pitch);	\
-   tmp &= 0xffffff;					\
-   tmp |= ((d)<<24);					\
-   *(GLuint *)(buf + (_x)*4 + (_y)*pitch) = tmp;		\
-}
-
-#define READ_STENCIL( d, _x, _y )			\
-   d = *(GLuint *)(buf + (_x)*4 + (_y)*pitch) >> 24;
-
-#define TAG(x) intel##x##_z24_s8
-#include "stenciltmp.h"
-
-
-/* Move locking out to get reasonable span performance.
- */
-void intelSpanRenderStart( GLcontext *ctx )
-{
-   struct intel_context *intel = intel_context(ctx);
-
-   if (intel->need_flush) {
-      LOCK_HARDWARE(intel);
-      intel->vtbl.emit_flush(intel, 0);
-      intel_batchbuffer_flush(intel->batch);
-      intel->need_flush = 0;
-      UNLOCK_HARDWARE(intel);
-      intelFinish(&intel->ctx);
-   }
-
-
-   LOCK_HARDWARE(intel);
-
-   /* Just map the framebuffer and all textures.  Bufmgr code will
-    * take care of waiting on the necessary fences:
-    */
-   intel_region_map(intel, intel->front_region);
-   intel_region_map(intel, intel->back_region);
-   intel_region_map(intel, intel->depth_region);
-}
-
-void intelSpanRenderFinish( GLcontext *ctx )
-{
-   struct intel_context *intel = intel_context( ctx );
-
-   _swrast_flush( ctx );
-
-   /* Now unmap the framebuffer:
-    */
-   intel_region_unmap(intel, intel->front_region);
-   intel_region_unmap(intel, intel->back_region);
-   intel_region_unmap(intel, intel->depth_region);
-
-   UNLOCK_HARDWARE( intel );
-}
-
-void intelInitSpanFuncs( GLcontext *ctx )
-{
-   struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
-   swdd->SpanRenderStart = intelSpanRenderStart;
-   swdd->SpanRenderFinish = intelSpanRenderFinish; 
-}
-
-
-/**
- * Plug in the Get/Put routines for the given driRenderbuffer.
- */
-void
-intelSetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis)
-{
-   if (drb->Base.InternalFormat == GL_RGBA) {
-      if (vis->redBits == 5 && vis->greenBits == 5 && vis->blueBits == 5) {
-         intelInitPointers_555(&drb->Base);
-      }
-      else if (vis->redBits == 5 && vis->greenBits == 6 && vis->blueBits == 5) {
-         intelInitPointers_565(&drb->Base);
-      }
-      else {
-         assert(vis->redBits == 8);
-         assert(vis->greenBits == 8);
-         assert(vis->blueBits == 8);
-         intelInitPointers_8888(&drb->Base);
-      }
-   }
-   else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
-      intelInitDepthPointers_z16(&drb->Base);
-   }
-   else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
-      intelInitDepthPointers_z24_s8(&drb->Base);
-   }
-   else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
-      intelInitStencilPointers_z24_s8(&drb->Base);
-   }
-}
+../intel/intel_span.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_span.h b/src/mesa/drivers/dri/i965/intel_span.h
deleted file mode 100644
index 2d4f8589d0..0000000000
--- a/src/mesa/drivers/dri/i965/intel_span.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef _INTEL_SPAN_H
-#define _INTEL_SPAN_H
-
-#include "drirenderbuffer.h"
-
-extern void intelInitSpanFuncs( GLcontext *ctx );
-
-extern void intelSpanRenderFinish( GLcontext *ctx );
-extern void intelSpanRenderStart( GLcontext *ctx );
-
-extern void
-intelSetSpanFunctions(driRenderbuffer *rb, const GLvisual *vis);
-
-#endif
diff --git a/src/mesa/drivers/dri/i965/intel_state.c b/src/mesa/drivers/dri/i965/intel_state.c
index 2f5467a4e4..0fba5a771c 100644
--- a/src/mesa/drivers/dri/i965/intel_state.c
+++ b/src/mesa/drivers/dri/i965/intel_state.c
@@ -195,15 +195,16 @@ int intel_translate_logic_op( GLenum opcode )
 static void intelClearColor(GLcontext *ctx, const GLfloat color[4])
 {
    struct intel_context *intel = intel_context(ctx);
-   intelScreenPrivate *screen = intel->intelScreen;
 
    UNCLAMPED_FLOAT_TO_RGBA_CHAN(intel->clear_chan, color);
 
-   intel->ClearColor = INTEL_PACKCOLOR(screen->fbFormat,
-				       intel->clear_chan[0], 
-				       intel->clear_chan[1], 
-				       intel->clear_chan[2], 
-				       intel->clear_chan[3]);
+   intel->ClearColor8888 = INTEL_PACKCOLOR8888(intel->clear_chan[0],
+					       intel->clear_chan[1],
+					       intel->clear_chan[2],
+					       intel->clear_chan[3]);
+   intel->ClearColor565 = INTEL_PACKCOLOR565(intel->clear_chan[0],
+					     intel->clear_chan[1],
+					     intel->clear_chan[2]);
 }
 
 
diff --git a/src/mesa/drivers/dri/i965/intel_tex.c b/src/mesa/drivers/dri/i965/intel_tex.c
index e98e9bbfc4..d77ce749a3 100644..120000
--- a/src/mesa/drivers/dri/i965/intel_tex.c
+++ b/src/mesa/drivers/dri/i965/intel_tex.c
@@ -1,316 +1 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "mtypes.h"
-#include "image.h"
-#include "texstore.h"
-#include "texformat.h"
-#include "teximage.h"
-#include "texobj.h"
-#include "swrast/swrast.h"
-
-
-#include "intel_context.h"
-#include "intel_tex.h"
-#include "intel_mipmap_tree.h"
-
-
-static GLuint target_to_face( GLenum target )
-{
-   switch (target) {
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
-      return ((GLuint) target - 
-	      (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X);
-   default:
-      return 0;
-   }
-}
-
-static void intelTexImage1D( GLcontext *ctx, GLenum target, GLint level,
-			    GLint internalFormat,
-			    GLint width, GLint border,
-			    GLenum format, GLenum type, const GLvoid *pixels,
-			    const struct gl_pixelstore_attrib *packing,
-			    struct gl_texture_object *texObj,
-			    struct gl_texture_image *texImage )
-{
-   struct intel_texture_object *intelObj = intel_texture_object(texObj);
-
-   _mesa_store_teximage1d( ctx, target, level, internalFormat,
-			   width, border, format, type,
-			   pixels, packing, texObj, texImage );
-
-   intelObj->dirty_images[0] |= (1 << level);
-   intelObj->dirty |= 1;
-}
-
-static void intelTexSubImage1D( GLcontext *ctx, 
-			       GLenum target,
-			       GLint level,	
-			       GLint xoffset,
-				GLsizei width,
-			       GLenum format, GLenum type,
-			       const GLvoid *pixels,
-			       const struct gl_pixelstore_attrib *packing,
-			       struct gl_texture_object *texObj,
-			       struct gl_texture_image *texImage )
-{
-   struct intel_texture_object *intelObj = intel_texture_object(texObj);
-
-   _mesa_store_texsubimage1d(ctx, target, level, xoffset, width, 
-			     format, type, pixels, packing, texObj,
-			     texImage);
-
-   intelObj->dirty_images[0] |= (1 << level);
-   intelObj->dirty |= 1;
-}
-
-
-/* Handles 2D, CUBE, RECT:
- */
-static void intelTexImage2D( GLcontext *ctx, GLenum target, GLint level,
-			    GLint internalFormat,
-			    GLint width, GLint height, GLint border,
-			    GLenum format, GLenum type, const GLvoid *pixels,
-			    const struct gl_pixelstore_attrib *packing,
-			    struct gl_texture_object *texObj,
-			    struct gl_texture_image *texImage )
-{
-   struct intel_texture_object *intelObj = intel_texture_object(texObj);
-   GLuint face = target_to_face(target);
-
-   _mesa_store_teximage2d( ctx, target, level, internalFormat,
-			   width, height, border, format, type,
-			   pixels, packing, texObj, texImage );
-
-   intelObj->dirty_images[face] |= (1 << level);
-   intelObj->dirty |= 1 << face;
-}
-
-static void intelTexSubImage2D( GLcontext *ctx, 
-			       GLenum target,
-			       GLint level,	
-			       GLint xoffset, GLint yoffset,
-			       GLsizei width, GLsizei height,
-			       GLenum format, GLenum type,
-			       const GLvoid *pixels,
-			       const struct gl_pixelstore_attrib *packing,
-			       struct gl_texture_object *texObj,
-			       struct gl_texture_image *texImage )
-{
-   struct intel_texture_object *intelObj = intel_texture_object(texObj);
-   GLuint face = target_to_face(target);
-
-   _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width, 
-			     height, format, type, pixels, packing, texObj,
-			     texImage);
-
-   intelObj->dirty_images[face] |= (1 << level);
-   intelObj->dirty |= 1 << face;
-}
-
-static void intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level,
-                              GLint internalFormat,
-                              GLint width, GLint height, GLint border,
-                              GLsizei imageSize, const GLvoid *data,
-                              struct gl_texture_object *texObj,
-                              struct gl_texture_image *texImage )
-{
-   struct intel_texture_object *intelObj = intel_texture_object(texObj);
-   GLuint face = target_to_face(target);
-
-   _mesa_store_compressed_teximage2d(ctx, target, level, internalFormat, width,
-				     height, border, imageSize, data, texObj, texImage);
-   
-   intelObj->dirty_images[face] |= (1 << level);
-   intelObj->dirty |= 1 << face;
-}
-
-
-static void intelCompressedTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
-                                 GLint xoffset, GLint yoffset,
-                                 GLsizei width, GLsizei height,
-                                 GLenum format,
-                                 GLsizei imageSize, const GLvoid *data,
-                                 struct gl_texture_object *texObj,
-                                 struct gl_texture_image *texImage )
-{
-   struct intel_texture_object *intelObj = intel_texture_object(texObj);
-   GLuint face = target_to_face(target);
-
-   _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
-					height, format, imageSize, data, texObj, texImage);
-   
-   intelObj->dirty_images[face] |= (1 << level);
-   intelObj->dirty |= 1 << face;
-}
-
-
-static void intelTexImage3D( GLcontext *ctx, GLenum target, GLint level,
-                            GLint internalFormat,
-                            GLint width, GLint height, GLint depth,
-                            GLint border,
-                            GLenum format, GLenum type, const GLvoid *pixels,
-                            const struct gl_pixelstore_attrib *packing,
-                            struct gl_texture_object *texObj,
-                            struct gl_texture_image *texImage )
-{
-   struct intel_texture_object *intelObj = intel_texture_object(texObj);
-
-   _mesa_store_teximage3d(ctx, target, level, internalFormat,
-			  width, height, depth, border,
-			  format, type, pixels,
-			  &ctx->Unpack, texObj, texImage);
-   
-   intelObj->dirty_images[0] |= (1 << level);
-   intelObj->dirty |= 1 << 0;
-}
-
-
-static void
-intelTexSubImage3D( GLcontext *ctx, GLenum target, GLint level,
-                   GLint xoffset, GLint yoffset, GLint zoffset,
-                   GLsizei width, GLsizei height, GLsizei depth,
-                   GLenum format, GLenum type,
-                   const GLvoid *pixels,
-                   const struct gl_pixelstore_attrib *packing,
-                   struct gl_texture_object *texObj,
-                   struct gl_texture_image *texImage )
-{
-   struct intel_texture_object *intelObj = intel_texture_object(texObj);
-
-   _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset,
-                             width, height, depth,
-                             format, type, pixels, packing, texObj, texImage);
-
-   intelObj->dirty_images[0] |= (1 << level);
-   intelObj->dirty |= 1 << 0;
-}
-
-
-
-
-static struct gl_texture_object *intelNewTextureObject( GLcontext *ctx, 
-							GLuint name, 
-							GLenum target )
-{
-   struct intel_texture_object *obj = CALLOC_STRUCT(intel_texture_object);
-
-   _mesa_initialize_texture_object(&obj->base, name, target);
-
-   return &obj->base;
-}
-
-static GLboolean intelIsTextureResident(GLcontext *ctx,
-                                      struct gl_texture_object *texObj)
-{
-#if 0
-   struct intel_context *intel = intel_context(ctx);
-   struct intel_texture_object *intelObj = intel_texture_object(texObj);
-   
-   return 
-      intelObj->mt && 
-      intelObj->mt->region && 
-      intel_is_region_resident(intel, intelObj->mt->region);
-#endif
-   return 1;
-}
-
-
-
-static void intelTexParameter( GLcontext *ctx, 
-			       GLenum target,
-			       struct gl_texture_object *texObj,
-			       GLenum pname, 
-			       const GLfloat *params )
-{
-   struct intel_texture_object *intelObj = intel_texture_object(texObj);
- 
-   switch (pname) {
-      /* Anything which can affect the calculation of firstLevel and
-       * lastLevel, as changes to these may invalidate the miptree.
-       */
-   case GL_TEXTURE_MIN_FILTER:
-   case GL_TEXTURE_MAG_FILTER:
-   case GL_TEXTURE_BASE_LEVEL:
-   case GL_TEXTURE_MAX_LEVEL:
-   case GL_TEXTURE_MIN_LOD:
-   case GL_TEXTURE_MAX_LOD:
-      intelObj->dirty |= 1;
-      break;
-
-   default:
-      break;
-   }
-}
-
-
-static void
-intel_delete_texture_object( GLcontext *ctx, struct gl_texture_object *texObj )
-{
-   struct intel_context *intel = intel_context(ctx);
-   struct intel_texture_object *intelObj = intel_texture_object(texObj);
-
-   if (intelObj->mt)
-      intel_miptree_destroy(intel, intelObj->mt);
-
-   _mesa_delete_texture_object( ctx, texObj );
-}
-
-void intelInitTextureFuncs( struct dd_function_table *functions )
-{
-   functions->NewTextureObject          = intelNewTextureObject;
-   functions->ChooseTextureFormat = intelChooseTextureFormat;
-   functions->TexImage1D                = intelTexImage1D;
-   functions->TexImage2D                = intelTexImage2D;
-   functions->TexImage3D                = intelTexImage3D;
-   functions->TexSubImage1D             = intelTexSubImage1D;
-   functions->TexSubImage2D             = intelTexSubImage2D;
-   functions->TexSubImage3D             = intelTexSubImage3D;
-   functions->CopyTexImage1D            = _swrast_copy_teximage1d;
-   functions->CopyTexImage2D            = _swrast_copy_teximage2d;
-   functions->CopyTexSubImage1D         = _swrast_copy_texsubimage1d;
-   functions->CopyTexSubImage2D         = _swrast_copy_texsubimage2d;
-   functions->CopyTexSubImage3D         = _swrast_copy_texsubimage3d;
-   functions->DeleteTexture             = intel_delete_texture_object;
-   functions->UpdateTexturePalette      = NULL;
-   functions->IsTextureResident = intelIsTextureResident;
-   functions->TestProxyTexImage         = _mesa_test_proxy_teximage;
-   functions->CompressedTexImage2D      = intelCompressedTexImage2D;
-   functions->CompressedTexSubImage2D   = intelCompressedTexSubImage2D;
-   functions->TexParameter              = intelTexParameter;
-}
-
-
-
-
-
+../intel/intel_tex.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_tex.h b/src/mesa/drivers/dri/i965/intel_tex.h
deleted file mode 100644
index d38325d538..0000000000
--- a/src/mesa/drivers/dri/i965/intel_tex.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef INTELTEX_INC
-#define INTELTEX_INC
-
-#include "mtypes.h"
-#include "intel_context.h"
-
-
-void intelInitTextureFuncs( struct dd_function_table *functions );
-
-const struct gl_texture_format *intelChooseTextureFormat(GLcontext * ctx,
-                                                         GLint internalFormat,
-                                                         GLenum format,
-                                                         GLenum type);
-
-GLuint intel_finalize_mipmap_tree( struct intel_context *intel,
-				   struct gl_texture_object *tObj );
-
-int intel_compressed_num_bytes(GLuint mesaFormat);
-
-#endif
diff --git a/src/mesa/drivers/dri/i965/intel_tex_copy.c b/src/mesa/drivers/dri/i965/intel_tex_copy.c
new file mode 120000
index 0000000000..87196c5d1e
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_tex_copy.c
@@ -0,0 +1 @@
+../intel/intel_tex_copy.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c
new file mode 120000
index 0000000000..567abe4974
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -0,0 +1 @@
+../intel/intel_tex_image.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
new file mode 120000
index 0000000000..b3a8a3d7ca
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
@@ -0,0 +1 @@
+../intel/intel_tex_subimage.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_tex_validate.c b/src/mesa/drivers/dri/i965/intel_tex_validate.c
index 4c8afd99da..41a75674c2 100644..120000
--- a/src/mesa/drivers/dri/i965/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_validate.c
@@ -1,265 +1 @@
-/**************************************************************************
- * 
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "mtypes.h"
-#include "macros.h"
-
-#include "intel_context.h"
-#include "intel_mipmap_tree.h"
-#include "intel_tex.h"
-#include "dri_bufmgr.h"
-
-/**
- * Compute which mipmap levels that really need to be sent to the hardware.
- * This depends on the base image size, GL_TEXTURE_MIN_LOD,
- * GL_TEXTURE_MAX_LOD, GL_TEXTURE_BASE_LEVEL, and GL_TEXTURE_MAX_LEVEL.
- */
-static void intel_calculate_first_last_level( struct intel_texture_object *intelObj )
-{
-   struct gl_texture_object *tObj = &intelObj->base;
-   const struct gl_texture_image * const baseImage =
-       tObj->Image[0][tObj->BaseLevel];
-
-   /* These must be signed values.  MinLod and MaxLod can be negative numbers,
-    * and having firstLevel and lastLevel as signed prevents the need for
-    * extra sign checks.
-    */
-   int   firstLevel;
-   int   lastLevel;
-
-   /* Yes, this looks overly complicated, but it's all needed.
-    */
-   switch (tObj->Target) {
-   case GL_TEXTURE_1D:
-   case GL_TEXTURE_2D:
-   case GL_TEXTURE_3D:
-   case GL_TEXTURE_CUBE_MAP:
-      if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) {
-         /* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL.
-          */
-         firstLevel = lastLevel = tObj->BaseLevel;
-      }
-      else {
-	 /* Currently not taking min/max lod into account here, those
-	  * values are programmed as sampler state elsewhere and we
-	  * upload the same mipmap levels regardless.  Not sure if
-	  * this makes sense as it means it isn't possible for the app
-	  * to use min/max lod to reduce texture memory pressure:
-	  */
-	 firstLevel = tObj->BaseLevel;
-	 lastLevel = MIN2(tObj->BaseLevel + baseImage->MaxLog2, 
-			  tObj->MaxLevel);
-	 lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */
-      }
-      break;
-   case GL_TEXTURE_RECTANGLE_NV:
-   case GL_TEXTURE_4D_SGIS:
-      firstLevel = lastLevel = 0;
-      break;
-   default:
-      return;
-   }
-
-   /* save these values */
-   intelObj->firstLevel = firstLevel;
-   intelObj->lastLevel = lastLevel;
-}
-
-static GLboolean copy_image_data_to_tree( struct intel_context *intel,
-					  struct intel_texture_object *intelObj,
-					  struct gl_texture_image *texImage,
-					  GLuint face,
-					  GLuint level)
-{
-   return intel_miptree_image_data(intel,
-				   intelObj->mt,
-				   face,
-				   level,
-				   texImage->Data,
-				   texImage->RowStride,
-				   (texImage->RowStride * 
-				    texImage->Height * 
-				    texImage->TexFormat->TexelBytes));
-}
-
-static void intel_texture_invalidate( struct intel_texture_object *intelObj )
-{
-   GLint nr_faces, face;
-   intelObj->dirty = ~0;
-
-   nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
-   for (face = 0; face < nr_faces; face++) 
-      intelObj->dirty_images[face] = ~0;
-}
-
-#if 0
-static void intel_texture_invalidate_cb( struct intel_context *intel,
-					 void *ptr )
-{
-   intel_texture_invalidate( (struct intel_texture_object *) ptr );
-}
-#endif
-
-/*  
- */
-GLuint intel_finalize_mipmap_tree( struct intel_context *intel,
-				   struct gl_texture_object *tObj )
-{
-   struct intel_texture_object *intelObj = intel_texture_object(tObj);
-   GLuint face, i;
-   GLuint nr_faces = 0;
-   struct gl_texture_image *firstImage;
-   GLuint cpp = 0;
-   
-   if( tObj == intel->frame_buffer_texobj )
-      return GL_FALSE;
-   
-   /* We know/require this is true by now: 
-    */
-   assert(intelObj->base._Complete);
-
-   /* What levels must the tree include at a minimum?
-    */
-   if (intelObj->dirty) {
-      intel_calculate_first_last_level( intelObj );
-/*       intel_miptree_destroy(intel, intelObj->mt); */
-/*       intelObj->mt = NULL; */
-   }
-
-   firstImage = intelObj->base.Image[0][intelObj->firstLevel];
-
-   /* Fallback case:
-    */
-   if (firstImage->Border) {
-      if (intelObj->mt) {
-	 intel_miptree_destroy(intel, intelObj->mt);
-	 intelObj->mt = NULL;
-	 /* Set all images dirty:
-	  */
-	 intel_texture_invalidate(intelObj);
-      }
-      return GL_FALSE;
-   }
-
-
-
-   if (firstImage->IsCompressed) {
-       cpp = intel_compressed_num_bytes(firstImage->TexFormat->MesaFormat);
-   } else {
-       cpp = firstImage->TexFormat->TexelBytes;
-   }
-       
-   /* Check tree can hold all active levels.  Check tree matches
-    * target, imageFormat, etc.
-    */
-   if (intelObj->mt &&
-       (intelObj->mt->target != intelObj->base.Target ||
-	intelObj->mt->internal_format != firstImage->InternalFormat ||
-	intelObj->mt->first_level != intelObj->firstLevel ||
-	intelObj->mt->last_level != intelObj->lastLevel ||
-	intelObj->mt->width0 != firstImage->Width ||
-	intelObj->mt->height0 != firstImage->Height ||
-	intelObj->mt->depth0 != firstImage->Depth ||
-	intelObj->mt->cpp != cpp ||
-	intelObj->mt->compressed != firstImage->IsCompressed)) 
-   {
-      intel_miptree_destroy(intel, intelObj->mt);
-      intelObj->mt = NULL;
-      
-      /* Set all images dirty:
-       */
-      intel_texture_invalidate(intelObj);
-   }
-      
-
-   /* May need to create a new tree:
-    */
-   if (!intelObj->mt) {
-      intelObj->mt = intel_miptree_create(intel,
-					  intelObj->base.Target,
-					  firstImage->InternalFormat,
-					  intelObj->firstLevel,
-					  intelObj->lastLevel,
-					  firstImage->Width,
-					  firstImage->Height,
-					  firstImage->Depth,
-					  cpp,
-					  firstImage->IsCompressed);
-#if 0
-      /* Tell the buffer manager that we will manage the backing
-       * store, but we still want it to do fencing for us.
-       */
-      bmBufferSetInvalidateCB(intel, 
-			      intelObj->mt->region->buffer,
-			      intel_texture_invalidate_cb,
-			      intelObj,
-			      GL_FALSE);
-#endif
-   }
-
-   /* Pull in any images not in the object's tree:
-    */
-   if (intelObj->dirty) {
-      nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
-      for (face = 0; face < nr_faces; face++) {
-	 if (intelObj->dirty_images[face]) {
-	    for (i = intelObj->firstLevel; i <= intelObj->lastLevel; i++) {
-	       struct gl_texture_image *texImage = intelObj->base.Image[face][i];
-
-	       /* Need to import images in main memory or held in other trees.
-		*/
-	       if (intelObj->dirty_images[face] & (1<<i) &&
-		   texImage) {
-
-		  if (INTEL_DEBUG & DEBUG_TEXTURE)
-		     _mesa_printf("copy data from image %d (%p) into object miptree\n",
-				  i,
-				  texImage->Data);
-
-		  if (!copy_image_data_to_tree(intel,
-					       intelObj,
-					       texImage,
-					       face,
-					       i))
-		     return GL_FALSE;
-
-	       }
-	    }
-	 }
-      }
-
-      /* Only clear the dirty flags if everything went ok:
-       */
-      for (face = 0; face < nr_faces; face++) {
-	 intelObj->dirty_images[face] = 0;
-      }
-
-      intelObj->dirty = 0;
-   }
-
-   return GL_TRUE;
-}
+../intel/intel_tex_validate.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c
index 373d332f85..479c30669c 100644
--- a/src/mesa/drivers/dri/intel/intel_blit.c
+++ b/src/mesa/drivers/dri/intel/intel_blit.c
@@ -80,18 +80,22 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
 
    if (dPriv && dPriv->numClipRects) {
       struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
-      const struct intel_region *frontRegion
-	 = intel_get_rb_region(&intel_fb->Base, BUFFER_FRONT_LEFT);
-      const struct intel_region *backRegion
-	 = intel_get_rb_region(&intel_fb->Base, BUFFER_BACK_LEFT);
-      const int nbox = dPriv->numClipRects;
-      const drm_clip_rect_t *pbox = dPriv->pClipRects;
-      const int cpp = frontRegion->cpp;
-      int src_pitch = backRegion->pitch * cpp;
-      int dst_pitch = frontRegion->pitch * cpp;
+      struct intel_region *src, *dst;
+      int nbox = dPriv->numClipRects;
+      drm_clip_rect_t *pbox = dPriv->pClipRects;
+      int cpp;
+      int src_pitch, dst_pitch;
       int BR13, CMD;
       int i;
 
+      src = intel_get_rb_region(&intel_fb->Base, BUFFER_BACK_LEFT);
+      dst = intel_get_rb_region(&intel_fb->Base, BUFFER_FRONT_LEFT);
+
+      src_pitch = src->pitch * src->cpp;
+      dst_pitch = dst->pitch * dst->cpp;
+
+      cpp = src->cpp;
+
       ASSERT(intel_fb);
       ASSERT(intel_fb->Base.Name == 0);    /* Not a user-created FBO */
       ASSERT(frontRegion);
@@ -109,40 +113,30 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
       }
 
 #ifndef I915
-      if (backRegion->tiled) {
+      if (src->tiled) {
 	 CMD |= XY_SRC_TILED;
 	 src_pitch /= 4;
       }
-      if (frontRegion->tiled) {
+      if (dst->tiled) {
 	 CMD |= XY_DST_TILED;
 	 dst_pitch /= 4;
       }
 #endif
 
       for (i = 0; i < nbox; i++, pbox++) {
-	 drm_clip_rect_t box;
-
-	 if (pbox->x1 >= pbox->x2 ||
-	     pbox->y1 >= pbox->y2 ||
-	     pbox->x2 > intelScreen->width || pbox->y2 > intelScreen->height)
-	    continue;
-
-	 box = *pbox;
+	 drm_clip_rect_t box = *pbox;
 
 	 if (rect) {
-	    if (rect->x1 > box.x1)
-	       box.x1 = rect->x1;
-	    if (rect->y1 > box.y1)
-	       box.y1 = rect->y1;
-	    if (rect->x2 < box.x2)
-	       box.x2 = rect->x2;
-	    if (rect->y2 < box.y2)
-	       box.y2 = rect->y2;
-
-	    if (box.x1 >= box.x2 || box.y1 >= box.y2)
+	    if (!intel_intersect_cliprects(&box, &box, rect))
 	       continue;
 	 }
 
+	 if (box.x1 >= box.x2 ||
+	     box.y1 >= box.y2 ||
+	     box.x2 > intelScreen->width ||
+	     box.y2 > intelScreen->height)
+	    continue;
+
 	 assert(box.x1 < box.x2);
 	 assert(box.y1 < box.y2);
 
@@ -152,13 +146,10 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
 	 OUT_BATCH((box.y1 << 16) | box.x1);
 	 OUT_BATCH((box.y2 << 16) | box.x2);
 
-	 OUT_RELOC(frontRegion->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
-		   0);
+	 OUT_RELOC(dst->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, 0);
 	 OUT_BATCH((box.y1 << 16) | box.x1);
 	 OUT_BATCH(src_pitch);
-	 OUT_RELOC(backRegion->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
-		   0);
-
+	 OUT_RELOC(src->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0);
 	 ADVANCE_BATCH();
       }
 
@@ -178,12 +169,14 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
 
 void
 intelEmitFillBlit(struct intel_context *intel,
-                  GLuint cpp,
-                  GLshort dst_pitch,
-                  dri_bo *dst_buffer,
-                  GLuint dst_offset,
+		  GLuint cpp,
+		  GLshort dst_pitch,
+		  dri_bo *dst_buffer,
+		  GLuint dst_offset,
 		  GLboolean dst_tiled,
-                  GLshort x, GLshort y, GLshort w, GLshort h, GLuint color)
+		  GLshort x, GLshort y,
+		  GLshort w, GLshort h,
+		  GLuint color)
 {
    GLuint BR13, CMD;
    BATCH_LOCALS;
@@ -227,7 +220,6 @@ intelEmitFillBlit(struct intel_context *intel,
    ADVANCE_BATCH();
 }
 
-
 static GLuint translate_raster_op(GLenum logicop)
 {
    switch(logicop) {
@@ -256,17 +248,17 @@ static GLuint translate_raster_op(GLenum logicop)
  */
 void
 intelEmitCopyBlit(struct intel_context *intel,
-                  GLuint cpp,
-                  GLshort src_pitch,
-                  dri_bo *src_buffer,
-                  GLuint src_offset,
+		  GLuint cpp,
+		  GLshort src_pitch,
+		  dri_bo *src_buffer,
+		  GLuint src_offset,
 		  GLboolean src_tiled,
-                  GLshort dst_pitch,
-                  dri_bo *dst_buffer,
-                  GLuint dst_offset,
+		  GLshort dst_pitch,
+		  dri_bo *dst_buffer,
+		  GLuint dst_offset,
 		  GLboolean dst_tiled,
-                  GLshort src_x, GLshort src_y,
-                  GLshort dst_x, GLshort dst_y, 
+		  GLshort src_x, GLshort src_y,
+		  GLshort dst_x, GLshort dst_y,
 		  GLshort w, GLshort h,
 		  GLenum logic_op)
 {
@@ -284,7 +276,7 @@ intelEmitCopyBlit(struct intel_context *intel,
    src_pitch *= cpp;
    dst_pitch *= cpp;
 
-   BR13 = (translate_raster_op(logic_op) << 16);
+   BR13 = translate_raster_op(logic_op) << 16;
 
    switch (cpp) {
    case 1:
@@ -316,6 +308,8 @@ intelEmitCopyBlit(struct intel_context *intel,
       return;
    }
 
+   dst_pitch &= 0xffff;
+   src_pitch &= 0xffff;
 
    /* Initial y values don't seem to work with negative pitches.  If
     * we adjust the offsets manually (below), it seems to work fine.
@@ -333,10 +327,12 @@ intelEmitCopyBlit(struct intel_context *intel,
       OUT_BATCH(BR13 | dst_pitch);
       OUT_BATCH((dst_y << 16) | dst_x);
       OUT_BATCH((dst_y2 << 16) | dst_x2);
-      OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset);
+      OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
+		dst_offset);
       OUT_BATCH((src_y << 16) | src_x);
       OUT_BATCH(src_pitch);
-      OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, src_offset);
+      OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+		src_offset);
       ADVANCE_BATCH();
    }
    else {
@@ -349,11 +345,11 @@ intelEmitCopyBlit(struct intel_context *intel,
       OUT_BATCH((0 << 16) | dst_x);
       OUT_BATCH((h << 16) | dst_x2);
       OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
-                dst_offset + dst_y * dst_pitch);
+		dst_offset + dst_y * dst_pitch);
       OUT_BATCH((0 << 16) | src_x);
       OUT_BATCH(src_pitch);
       OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
-                src_offset + src_y * src_pitch);
+		src_offset + src_y * src_pitch);
       ADVANCE_BATCH();
    }
 }
@@ -367,7 +363,7 @@ intelEmitCopyBlit(struct intel_context *intel,
  * \param mask  bitmask of BUFFER_BIT_* values indicating buffers to clear
  */
 void
-intelClearWithBlit(GLcontext * ctx, GLbitfield mask)
+intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
 {
    struct intel_context *intel = intel_context(ctx);
    struct gl_framebuffer *fb = ctx->DrawBuffer;
@@ -375,8 +371,6 @@ intelClearWithBlit(GLcontext * ctx, GLbitfield mask)
    GLbitfield skipBuffers = 0;
    BATCH_LOCALS;
 
-   DBG("%s %x\n", __FUNCTION__, mask);
-
    /*
     * Compute values for clearing the buffers.
     */
@@ -542,3 +536,77 @@ intelClearWithBlit(GLcontext * ctx, GLbitfield mask)
 
    UNLOCK_HARDWARE(intel);
 }
+
+void
+intelEmitImmediateColorExpandBlit(struct intel_context *intel,
+				  GLuint cpp,
+				  GLubyte *src_bits, GLuint src_size,
+				  GLuint fg_color,
+				  GLshort dst_pitch,
+				  dri_bo *dst_buffer,
+				  GLuint dst_offset,
+				  GLboolean dst_tiled,
+				  GLshort x, GLshort y,
+				  GLshort w, GLshort h,
+				  GLenum logic_op)
+{
+   int dwords = ALIGN(src_size, 8) / 4;
+   uint32_t opcode, br13, blit_cmd;
+
+   assert( logic_op - GL_CLEAR >= 0 );
+   assert( logic_op - GL_CLEAR < 0x10 );
+
+   if (w < 0 || h < 0)
+      return;
+
+   dst_pitch *= cpp;
+
+   if (dst_tiled)
+      dst_pitch /= 4;
+
+   DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n",
+       __FUNCTION__,
+       dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);
+
+   intel_batchbuffer_require_space( intel->batch,
+				    (8 * 4) +
+				    (3 * 4) +
+				    dwords,
+				    INTEL_BATCH_NO_CLIPRECTS );
+
+   opcode = XY_SETUP_BLT_CMD;
+   if (cpp == 4)
+      opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
+   if (dst_tiled)
+      opcode |= XY_DST_TILED;
+
+   br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29);
+   if (cpp == 2)
+      br13 |= BR13_565;
+   else
+      br13 |= BR13_8888;
+
+   blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */
+   if (dst_tiled)
+      blit_cmd |= XY_DST_TILED;
+
+   BEGIN_BATCH(8 + 3, INTEL_BATCH_NO_CLIPRECTS);
+   OUT_BATCH(opcode);
+   OUT_BATCH(br13);
+   OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
+   OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */
+   OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset);
+   OUT_BATCH(0); /* bg */
+   OUT_BATCH(fg_color); /* fg */
+   OUT_BATCH(0); /* pattern base addr */
+
+   OUT_BATCH(blit_cmd | ((3 - 2) + dwords));
+   OUT_BATCH((y << 16) | x);
+   OUT_BATCH(((y + h) << 16) | (x + w));
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_data( intel->batch,
+			   src_bits,
+			   dwords * 4,
+			   INTEL_BATCH_NO_CLIPRECTS );
+}
diff --git a/src/mesa/drivers/dri/intel/intel_blit.h b/src/mesa/drivers/dri/intel/intel_blit.h
index 35cc8868d9..fc0620caba 100644
--- a/src/mesa/drivers/dri/intel/intel_blit.h
+++ b/src/mesa/drivers/dri/intel/intel_blit.h
@@ -61,5 +61,17 @@ extern void intelEmitFillBlit(struct intel_context *intel,
                               GLshort x, GLshort y,
                               GLshort w, GLshort h, GLuint color);
 
+void
+intelEmitImmediateColorExpandBlit(struct intel_context *intel,
+				  GLuint cpp,
+				  GLubyte *src_bits, GLuint src_size,
+				  GLuint fg_color,
+				  GLshort dst_pitch,
+				  dri_bo *dst_buffer,
+				  GLuint dst_offset,
+				  GLboolean dst_tiled,
+				  GLshort x, GLshort y,
+				  GLshort w, GLshort h,
+				  GLenum logic_op);
 
 #endif
diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index d93677bde2..89b489e28a 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -135,15 +135,23 @@ intel_bufferobj_data(GLcontext * ctx,
    if (intel_obj->region)
       intel_bufferobj_release_region(intel, intel_obj);
 
+   /* While it would seem to make sense to always reallocate the buffer here,
+    * since it should allow us better concurrency between rendering and
+    * map-cpu write-unmap, doing so was a minor (~10%) performance loss
+    * for both classic and TTM mode with openarena.  That may change with
+    * improved buffer manager algorithms.
+    */
    if (intel_obj->buffer != NULL && intel_obj->buffer->size != size) {
       dri_bo_unreference(intel_obj->buffer);
       intel_obj->buffer = NULL;
    }
+   if (size != 0) {
+      if (intel_obj->buffer == NULL)
+	 intel_bufferobj_alloc_buffer(intel, intel_obj);
 
-   intel_bufferobj_alloc_buffer(intel, intel_obj);
-
-   if (data != NULL)
-      dri_bo_subdata(intel_obj->buffer, 0, size, data);
+      if (data != NULL)
+	 dri_bo_subdata(intel_obj->buffer, 0, size, data);
+   }
 }
 
 
diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.h b/src/mesa/drivers/dri/intel/intel_buffer_objects.h
index db579a8ae4..7cecc3232d 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.h
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.h
@@ -1,4 +1,4 @@
- /**************************************************************************
+/**************************************************************************
  * 
  * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas.
  * All Rights Reserved.
diff --git a/src/mesa/drivers/dri/intel/intel_buffers.c b/src/mesa/drivers/dri/intel/intel_buffers.c
index 44a55bbed9..78ffa3c1f8 100644
--- a/src/mesa/drivers/dri/intel/intel_buffers.c
+++ b/src/mesa/drivers/dri/intel/intel_buffers.c
@@ -29,9 +29,9 @@
 #include "intel_context.h"
 #include "intel_blit.h"
 #include "intel_buffers.h"
+#include "intel_chipset.h"
 #include "intel_depthstencil.h"
 #include "intel_fbo.h"
-#include "intel_tris.h"
 #include "intel_regions.h"
 #include "intel_batchbuffer.h"
 #include "intel_reg.h"
@@ -41,7 +41,7 @@
 #include "framebuffer.h"
 #include "swrast/swrast.h"
 #include "vblank.h"
-
+#include "i915_drm.h"
 
 /* This block can be removed when libdrm >= 2.3.1 is required */
 
@@ -59,6 +59,7 @@ typedef struct drm_i915_flip {
 
 #endif
 
+#define FILE_DEBUG_FLAG DEBUG_BLIT
 
 /**
  * XXX move this into a new dri/common/cliprects.c file.
@@ -196,6 +197,77 @@ intelSetBackClipRects(struct intel_context *intel)
    }
 }
 
+#ifdef I915
+static void
+intelUpdatePageFlipping(struct intel_context *intel,
+			GLint areaA, GLint areaB)
+{
+   __DRIdrawablePrivate *dPriv = intel->driDrawable;
+   struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
+   GLboolean pf_active;
+   GLint pf_planes;
+
+   /* Update page flipping info */
+   pf_planes = 0;
+
+   if (areaA > 0)
+      pf_planes |= 1;
+
+   if (areaB > 0)
+      pf_planes |= 2;
+
+   intel_fb->pf_current_page = (intel->sarea->pf_current_page >>
+				(intel_fb->pf_planes & 0x2)) & 0x3;
+
+   intel_fb->pf_num_pages = intel->intelScreen->third.handle ? 3 : 2;
+
+   pf_active = pf_planes && (pf_planes & intel->sarea->pf_active) == pf_planes;
+
+   if (INTEL_DEBUG & DEBUG_LOCK)
+      if (pf_active != intel_fb->pf_active)
+	 _mesa_printf("%s - Page flipping %sactive\n", __progname,
+		      pf_active ? "" : "in");
+
+   if (pf_active) {
+      /* Sync pages between planes if flipping on both at the same time */
+      if (pf_planes == 0x3 && pf_planes != intel_fb->pf_planes &&
+	  (intel->sarea->pf_current_page & 0x3) !=
+	  (((intel->sarea->pf_current_page) >> 2) & 0x3)) {
+	 drm_i915_flip_t flip;
+
+	 if (intel_fb->pf_current_page ==
+	     (intel->sarea->pf_current_page & 0x3)) {
+	    /* XXX: This is ugly, but emitting two flips 'in a row' can cause
+	     * lockups for unknown reasons.
+	     */
+	    intel->sarea->pf_current_page =
+	       intel->sarea->pf_current_page & 0x3;
+	    intel->sarea->pf_current_page |=
+	       ((intel_fb->pf_current_page + intel_fb->pf_num_pages - 1) %
+		intel_fb->pf_num_pages) << 2;
+
+	    flip.pipes = 0x2;
+	 } else {
+	    intel->sarea->pf_current_page =
+	       intel->sarea->pf_current_page & (0x3 << 2);
+	    intel->sarea->pf_current_page |=
+	       (intel_fb->pf_current_page + intel_fb->pf_num_pages - 1) %
+	       intel_fb->pf_num_pages;
+
+	    flip.pipes = 0x1;
+	 }
+
+	 drmCommandWrite(intel->driFd, DRM_I915_FLIP, &flip, sizeof(flip));
+      }
+
+      intel_fb->pf_planes = pf_planes;
+   }
+
+   intel_fb->pf_active = pf_active;
+   intel_flip_renderbuffers(intel_fb);
+   intel_draw_buffer(&intel->ctx, intel->ctx.DrawBuffer);
+}
+#endif /* I915 */
 
 /**
  * This will be called whenever the currently bound window is moved/resized.
@@ -232,7 +304,7 @@ intelWindowMoved(struct intel_context *intel)
    }
 
    if (intel->intelScreen->driScrnPriv->ddx_version.minor >= 7) {
-      drmI830Sarea *sarea = intel->sarea;
+      volatile drmI830Sarea *sarea = intel->sarea;
       drm_clip_rect_t drw_rect = { .x1 = dPriv->x, .x2 = dPriv->x + dPriv->w,
 				   .y1 = dPriv->y, .y2 = dPriv->y + dPriv->h };
       drm_clip_rect_t planeA_rect = { .x1 = sarea->planeA_x, .y1 = sarea->planeA_y,
@@ -244,69 +316,10 @@ intelWindowMoved(struct intel_context *intel)
       GLint areaA = driIntersectArea( drw_rect, planeA_rect );
       GLint areaB = driIntersectArea( drw_rect, planeB_rect );
       GLuint flags = dPriv->vblFlags;
-      GLboolean pf_active;
-      GLint pf_planes;
-
-      /* Update page flipping info
-       */
-      pf_planes = 0;
-
-      if (areaA > 0)
-	 pf_planes |= 1;
-
-      if (areaB > 0)
-	 pf_planes |= 2;
-
-      intel_fb->pf_current_page = (intel->sarea->pf_current_page >>
-				   (intel_fb->pf_planes & 0x2)) & 0x3;
-
-      intel_fb->pf_num_pages = intel->intelScreen->third.handle ? 3 : 2;
-
-      pf_active = pf_planes && (pf_planes & intel->sarea->pf_active) == pf_planes;
-
-      if (INTEL_DEBUG & DEBUG_LOCK)
-	 if (pf_active != intel_fb->pf_active)
-	    _mesa_printf("%s - Page flipping %sactive\n", __progname,
-			 pf_active ? "" : "in");
-
-      if (pf_active) {
-	 /* Sync pages between planes if flipping on both at the same time */
-	 if (pf_planes == 0x3 && pf_planes != intel_fb->pf_planes &&
-	     (intel->sarea->pf_current_page & 0x3) !=
-	     (((intel->sarea->pf_current_page) >> 2) & 0x3)) {
-	    drm_i915_flip_t flip;
-
-	    if (intel_fb->pf_current_page ==
-		(intel->sarea->pf_current_page & 0x3)) {
-	       /* XXX: This is ugly, but emitting two flips 'in a row' can cause
-		* lockups for unknown reasons.
-		*/
-               intel->sarea->pf_current_page =
-		  intel->sarea->pf_current_page & 0x3;
-	       intel->sarea->pf_current_page |=
-		  ((intel_fb->pf_current_page + intel_fb->pf_num_pages - 1) %
-		   intel_fb->pf_num_pages) << 2;
-
-	       flip.pipes = 0x2;
-	    } else {
-               intel->sarea->pf_current_page =
-		  intel->sarea->pf_current_page & (0x3 << 2);
-	       intel->sarea->pf_current_page |=
-		  (intel_fb->pf_current_page + intel_fb->pf_num_pages - 1) %
-		  intel_fb->pf_num_pages;
-
-	       flip.pipes = 0x1;
-	    }
-
-	    drmCommandWrite(intel->driFd, DRM_I915_FLIP, &flip, sizeof(flip));
-	 }
-
-	 intel_fb->pf_planes = pf_planes;
-      }
 
-      intel_fb->pf_active = pf_active;
-      intel_flip_renderbuffers(intel_fb);
-      intel_draw_buffer(&intel->ctx, intel->ctx.DrawBuffer);
+#ifdef I915
+      intelUpdatePageFlipping(intel, areaA, areaB);
+#endif
 
       /* Update vblank info
        */
@@ -370,11 +383,14 @@ intelWindowMoved(struct intel_context *intel)
    intel_fb->Base.Initialized = GL_TRUE; /* XXX remove someday */
 
    /* Update hardware scissor */
-   ctx->Driver.Scissor(ctx, ctx->Scissor.X, ctx->Scissor.Y,
-                       ctx->Scissor.Width, ctx->Scissor.Height);
+   if (ctx->Driver.Scissor != NULL) {
+      ctx->Driver.Scissor(ctx, ctx->Scissor.X, ctx->Scissor.Y,
+			  ctx->Scissor.Width, ctx->Scissor.Height);
+   }
 
    /* Re-calculate viewport related state */
-   ctx->Driver.DepthRange( ctx, ctx->Viewport.Near, ctx->Viewport.Far );
+   if (ctx->Driver.DepthRange != NULL)
+      ctx->Driver.DepthRange( ctx, ctx->Viewport.Near, ctx->Viewport.Far );
 }
 
 
@@ -389,9 +405,6 @@ intelClearWithTris(struct intel_context *intel, GLbitfield mask)
    struct gl_framebuffer *fb = ctx->DrawBuffer;
    GLuint buf;
 
-   if (INTEL_DEBUG & DEBUG_BLIT)
-      _mesa_printf("%s 0x%x\n", __FUNCTION__, mask);
-
    intel->vtbl.install_meta_state(intel);
 
    /* Back and stencil cliprects are the same.  Try and do both
@@ -402,8 +415,6 @@ intelClearWithTris(struct intel_context *intel, GLbitfield mask)
 	 intel_get_rb_region(fb, BUFFER_BACK_LEFT);
       struct intel_region *depthRegion =
 	 intel_get_rb_region(fb, BUFFER_DEPTH);
-      const GLuint clearColor = (backRegion && backRegion->cpp == 4)
-	 ? intel->ClearColor8888 : intel->ClearColor565;
 
       intel->vtbl.meta_draw_region(intel, backRegion, depthRegion);
 
@@ -424,13 +435,14 @@ intelClearWithTris(struct intel_context *intel, GLbitfield mask)
       else
 	 intel->vtbl.meta_no_depth_write(intel);
 
-      intel_meta_draw_quad(intel,
-			   fb->_Xmin,
-			   fb->_Xmax,
-			   fb->_Ymin,
-			   fb->_Ymax,
-			   intel->ctx.Depth.Clear, clearColor,
-			   0, 0, 0, 0);   /* texcoords */
+      intel->vtbl.meta_draw_quad(intel,
+				 fb->_Xmin,
+				 fb->_Xmax,
+				 fb->_Ymin,
+				 fb->_Ymax,
+				 intel->ctx.Depth.Clear,
+				 intel->ClearColor8888,
+				 0, 0, 0, 0);   /* texcoords */
 
       mask &= ~(BUFFER_BIT_BACK_LEFT | BUFFER_BIT_STENCIL | BUFFER_BIT_DEPTH);
    }
@@ -441,8 +453,6 @@ intelClearWithTris(struct intel_context *intel, GLbitfield mask)
       if (mask & bufBit) {
 	 struct intel_renderbuffer *irbColor =
 	    intel_renderbuffer(fb->Attachment[buf].Renderbuffer);
-	 GLuint color = (irbColor->region->cpp == 4)
-	    ? intel->ClearColor8888 : intel->ClearColor565;
 
 	 ASSERT(irbColor);
 
@@ -454,13 +464,13 @@ intelClearWithTris(struct intel_context *intel, GLbitfield mask)
 	 /* XXX: Using INTEL_BATCH_NO_CLIPRECTS here is dangerous as the
 	  * drawing origin may not be correctly emitted.
 	  */
-	 intel_meta_draw_quad(intel,
-			      fb->_Xmin,
-			      fb->_Xmax,
-			      fb->_Ymin,
-			      fb->_Ymax,
-			      0, color,
-			      0, 0, 0, 0);   /* texcoords */
+	 intel->vtbl.meta_draw_quad(intel,
+				    fb->_Xmin,
+				    fb->_Xmax,
+				    fb->_Ymin,
+				    fb->_Ymax,
+				    0, intel->ClearColor8888,
+				    0, 0, 0, 0);   /* texcoords */
 
 	 mask &= ~bufBit;
       }
@@ -470,6 +480,28 @@ intelClearWithTris(struct intel_context *intel, GLbitfield mask)
    intel_batchbuffer_flush(intel->batch);
 }
 
+static const char *buffer_names[] = {
+   [BUFFER_FRONT_LEFT] = "front",
+   [BUFFER_BACK_LEFT] = "back",
+   [BUFFER_FRONT_RIGHT] = "front right",
+   [BUFFER_BACK_RIGHT] = "back right",
+   [BUFFER_AUX0] = "aux0",
+   [BUFFER_AUX1] = "aux1",
+   [BUFFER_AUX2] = "aux2",
+   [BUFFER_AUX3] = "aux3",
+   [BUFFER_DEPTH] = "depth",
+   [BUFFER_STENCIL] = "stencil",
+   [BUFFER_ACCUM] = "accum",
+   [BUFFER_COLOR0] = "color0",
+   [BUFFER_COLOR1] = "color1",
+   [BUFFER_COLOR2] = "color2",
+   [BUFFER_COLOR3] = "color3",
+   [BUFFER_COLOR4] = "color4",
+   [BUFFER_COLOR5] = "color5",
+   [BUFFER_COLOR6] = "color6",
+   [BUFFER_COLOR7] = "color7",
+};
+
 /**
  * Called by ctx->Driver.Clear.
  */
@@ -504,8 +536,12 @@ intelClear(GLcontext *ctx, GLbitfield mask)
          = intel_get_rb_region(fb, BUFFER_STENCIL);
       if (stencilRegion) {
          /* have hw stencil */
-         if ((ctx->Stencil.WriteMask[0] & 0xff) != 0xff) {
-            /* not clearing all stencil bits, so use triangle clearing */
+         if (IS_965(intel->intelScreen->deviceID) ||
+	     (ctx->Stencil.WriteMask[0] & 0xff) != 0xff) {
+	    /* We have to use the 3D engine if we're clearing a partial mask
+	     * of the stencil buffer, or if we're on a 965 which has a tiled
+	     * depth/stencil buffer in a layout we can't blit to.
+	     */
             tri_mask |= BUFFER_BIT_STENCIL;
          }
          else {
@@ -518,7 +554,8 @@ intelClear(GLcontext *ctx, GLbitfield mask)
    /* HW depth */
    if (mask & BUFFER_BIT_DEPTH) {
       /* clear depth with whatever method is used for stencil (see above) */
-      if (tri_mask & BUFFER_BIT_STENCIL)
+      if (IS_965(intel->intelScreen->deviceID) ||
+	  tri_mask & BUFFER_BIT_STENCIL)
          tri_mask |= BUFFER_BIT_DEPTH;
       else
          blit_mask |= BUFFER_BIT_DEPTH;
@@ -538,17 +575,43 @@ intelClear(GLcontext *ctx, GLbitfield mask)
       }
    }
 
-
    intelFlush(ctx);             /* XXX intelClearWithBlit also does this */
 
-   if (blit_mask)
+   if (blit_mask) {
+      if (INTEL_DEBUG & DEBUG_BLIT) {
+	 DBG("blit clear:");
+	 for (i = 0; i < BUFFER_COUNT; i++) {
+	    if (blit_mask & (1 << i))
+	       DBG(" %s", buffer_names[i]);
+	 }
+	 DBG("\n");
+      }
       intelClearWithBlit(ctx, blit_mask);
+   }
 
-   if (tri_mask)
+   if (tri_mask) {
+      if (INTEL_DEBUG & DEBUG_BLIT) {
+	 DBG("tri clear:");
+	 for (i = 0; i < BUFFER_COUNT; i++) {
+	    if (tri_mask & (1 << i))
+	       DBG(" %s", buffer_names[i]);
+	 }
+	 DBG("\n");
+      }
       intelClearWithTris(intel, tri_mask);
+   }
 
-   if (swrast_mask)
+   if (swrast_mask) {
+      if (INTEL_DEBUG & DEBUG_BLIT) {
+	 DBG("swrast clear:");
+	 for (i = 0; i < BUFFER_COUNT; i++) {
+	    if (swrast_mask & (1 << i))
+	       DBG(" %s", buffer_names[i]);
+	 }
+	 DBG("\n");
+      }
       _swrast_Clear(ctx, swrast_mask);
+   }
 }
 
 
@@ -564,7 +627,8 @@ intel_wait_flips(struct intel_context *intel, GLuint batch_flags)
 			     BUFFER_BIT_FRONT_LEFT ? BUFFER_FRONT_LEFT :
 			     BUFFER_BACK_LEFT);
 
-   if (intel_fb->Base.Name == 0 && intel_rb->pf_pending == intel_fb->pf_seq) {
+   if (intel_fb->Base.Name == 0 && intel_rb &&
+       intel_rb->pf_pending == intel_fb->pf_seq) {
       GLint pf_planes = intel_fb->pf_planes;
       BATCH_LOCALS;
 
@@ -586,6 +650,7 @@ intel_wait_flips(struct intel_context *intel, GLuint batch_flags)
 static GLboolean
 intelPageFlip(const __DRIdrawablePrivate * dPriv)
 {
+#ifdef I915
    struct intel_context *intel;
    int ret;
    struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
@@ -638,6 +703,9 @@ intelPageFlip(const __DRIdrawablePrivate * dPriv)
    intel_draw_buffer(&intel->ctx, &intel_fb->Base);
 
    return GL_TRUE;
+#else
+   return GL_FALSE;
+#endif
 }
 
 #if 0
@@ -863,16 +931,14 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
    /*
     * How many color buffers are we drawing into?
     */
-   if (fb->_NumColorDrawBuffers[0] != 1
-#if 0
-       /* XXX FBO temporary - always use software rendering */
-       || 1
-#endif
-      ) {
+   if (fb->_NumColorDrawBuffers[0] != 1) {
       /* writing to 0 or 2 or 4 color buffers */
       /*_mesa_debug(ctx, "Software rendering\n");*/
       FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, GL_TRUE);
-      front = 1;                /* might not have back color buffer */
+      colorRegion = NULL;
+
+      if (fb->Name != 0)
+	 intelSetRenderbufferClipRects(intel);
    }
    else {
       /* draw to exactly one color buffer */
@@ -881,30 +947,30 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
       if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_FRONT_LEFT) {
          front = 1;
       }
-   }
 
-   /*
-    * Get the intel_renderbuffer for the colorbuffer we're drawing into.
-    * And set up cliprects.
-    */
-   if (fb->Name == 0) {
-      /* drawing to window system buffer */
-      if (front) {
-         intelSetFrontClipRects(intel);
-         colorRegion = intel_get_rb_region(fb, BUFFER_FRONT_LEFT);
+      /*
+       * Get the intel_renderbuffer for the colorbuffer we're drawing into.
+       * And set up cliprects.
+       */
+      if (fb->Name == 0) {
+	 /* drawing to window system buffer */
+	 if (front) {
+	    intelSetFrontClipRects(intel);
+	    colorRegion = intel_get_rb_region(fb, BUFFER_FRONT_LEFT);
+	 }
+	 else {
+	    intelSetBackClipRects(intel);
+	    colorRegion = intel_get_rb_region(fb, BUFFER_BACK_LEFT);
+	 }
       }
       else {
-         intelSetBackClipRects(intel);
-         colorRegion = intel_get_rb_region(fb, BUFFER_BACK_LEFT);
+	 /* drawing to user-created FBO */
+	 struct intel_renderbuffer *irb;
+	 intelSetRenderbufferClipRects(intel);
+	 irb = intel_renderbuffer(fb->_ColorDrawBuffers[0][0]);
+	 colorRegion = (irb && irb->region) ? irb->region : NULL;
       }
    }
-   else {
-      /* drawing to user-created FBO */
-      struct intel_renderbuffer *irb;
-      intelSetRenderbufferClipRects(intel);
-      irb = intel_renderbuffer(fb->_ColorDrawBuffers[0][0]);
-      colorRegion = (irb && irb->region) ? irb->region : NULL;
-   }
 
    /* Update culling direction which changes depending on the
     * orientation of the buffer:
@@ -953,7 +1019,10 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
          ASSERT(irbStencil->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT);
          FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_FALSE);
          /* need to re-compute stencil hw state */
-         ctx->Driver.Enable(ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled);
+	 if (ctx->Driver.Enable != NULL)
+	    ctx->Driver.Enable(ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled);
+	 else
+	    ctx->NewState |= _NEW_STENCIL;
          if (!depthRegion)
             depthRegion = irbStencil->region;
       }
@@ -965,42 +1034,46 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
       /* XXX FBO: instead of FALSE, pass ctx->Stencil.Enabled ??? */
       FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_FALSE);
       /* need to re-compute stencil hw state */
-      ctx->Driver.Enable(ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled);
+      if (ctx->Driver.Enable != NULL)
+	 ctx->Driver.Enable(ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled);
+      else
+	 ctx->NewState |= _NEW_STENCIL;
    }
 
    /*
     * Update depth test state
     */
-   if (ctx->Depth.Test && fb->Visual.depthBits > 0) {
-      ctx->Driver.Enable(ctx, GL_DEPTH_TEST, GL_TRUE);
-   }
-   else {
-      ctx->Driver.Enable(ctx, GL_DEPTH_TEST, GL_FALSE);
-   }
-
-   /**
-    ** Release old regions, reference new regions
-    **/
-#if 0                           /* XXX FBO: this seems to be redundant with i915_state_draw_region() */
-   if (intel->draw_region != colorRegion) {
-      intel_region_release(&intel->draw_region);
-      intel_region_reference(&intel->draw_region, colorRegion);
-   }
-   if (intel->intelScreen->depth_region != depthRegion) {
-      intel_region_release(&intel->intelScreen->depth_region);
-      intel_region_reference(&intel->intelScreen->depth_region, depthRegion);
+   if (ctx->Driver.Enable) {
+      if (ctx->Depth.Test && fb->Visual.depthBits > 0) {
+	 ctx->Driver.Enable(ctx, GL_DEPTH_TEST, GL_TRUE);
+      } else {
+	 ctx->Driver.Enable(ctx, GL_DEPTH_TEST, GL_FALSE);
+      }
+   } else {
+      ctx->NewState |= _NEW_DEPTH;
    }
-#endif
 
    intel->vtbl.set_draw_region(intel, colorRegion, depthRegion);
 
    /* update viewport since it depends on window size */
-   ctx->Driver.Viewport(ctx, ctx->Viewport.X, ctx->Viewport.Y,
-                        ctx->Viewport.Width, ctx->Viewport.Height);
+   if (ctx->Driver.Viewport) {
+      ctx->Driver.Viewport(ctx, ctx->Viewport.X, ctx->Viewport.Y,
+			   ctx->Viewport.Width, ctx->Viewport.Height);
+   } else {
+      ctx->NewState |= _NEW_VIEWPORT;
+   }
 
-   /* Update hardware scissor */
-   ctx->Driver.Scissor(ctx, ctx->Scissor.X, ctx->Scissor.Y,
-                       ctx->Scissor.Width, ctx->Scissor.Height);
+   /* Set state we know depends on drawable parameters:
+    */
+   if (ctx->Driver.Scissor)
+      ctx->Driver.Scissor(ctx, ctx->Scissor.X, ctx->Scissor.Y,
+			  ctx->Scissor.Width, ctx->Scissor.Height);
+   intel->NewGLState |= _NEW_SCISSOR;
+
+   if (ctx->Driver.DepthRange)
+      ctx->Driver.DepthRange(ctx,
+			     ctx->Viewport.Near,
+			     ctx->Viewport.Far);
 }
 
 
diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c
index 2ec0241af4..b2aa056b90 100644
--- a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c
+++ b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c
@@ -48,138 +48,138 @@
 
 #include "intel_bufmgr_ttm.h"
 
-#define BUFMGR_DEBUG 0
-
-struct intel_reloc_info
-{
-    GLuint type;
-    GLuint reloc;
-    GLuint delta;
-    GLuint index;
-    drm_handle_t handle;
-};
-
-struct intel_bo_node
-{
-    drmMMListHead head;
-    drmBO *buf;
-    struct drm_i915_op_arg bo_arg;
-    uint64_t flags;
-    uint64_t mask;
-    void (*destroy)(void *);
-    void *priv;
-};
-
-struct intel_bo_reloc_list
-{
-    drmMMListHead head;
-    drmBO buf;
-    uint32_t *relocs;
-};
-
-struct intel_bo_reloc_node
-{
-    drmMMListHead head;
-    drm_handle_t handle;
-    uint32_t nr_reloc_types;
-    struct intel_bo_reloc_list type_list;
-};
+#define DBG(...) do {					\
+   if (bufmgr_ttm->bufmgr.debug)			\
+      _mesa_printf(__VA_ARGS__);			\
+} while (0)
 
+/* Buffer validation list */
 struct intel_bo_list {
     unsigned numCurrent;
     drmMMListHead list;
-    void (*destroy)(void *node);
 };
 
 typedef struct _dri_bufmgr_ttm {
     dri_bufmgr bufmgr;
 
     int fd;
-    _glthread_Mutex mutex;
     unsigned int fence_type;
     unsigned int fence_type_flush;
 
     uint32_t max_relocs;
-    /** ttm relocation list */
-    struct intel_bo_list list;
-    struct intel_bo_list reloc_list;
-
+    struct intel_bo_list list; /* list of buffers to be validated */
 } dri_bufmgr_ttm;
 
 typedef struct _dri_bo_ttm {
     dri_bo bo;
 
-    int refcount;		/* Protected by bufmgr->mutex */
+    int refcount;
     drmBO drm_bo;
     const char *name;
+
+    /** DRM buffer object containing relocation list */
+    drmBO *reloc_buf;
+    uint32_t *relocs;
 } dri_bo_ttm;
 
 typedef struct _dri_fence_ttm
 {
     dri_fence fence;
 
-    int refcount;		/* Protected by bufmgr->mutex */
+    int refcount;
     const char *name;
     drmFence drm_fence;
 } dri_fence_ttm;
 
+/* Validation list node */
+struct intel_bo_node
+{
+    drmMMListHead head;
+    dri_bo *bo;
+    struct drm_i915_op_arg bo_arg;
+    uint64_t flags;
+    uint64_t mask;
+};
 
 static void
-intel_bo_free_list(struct intel_bo_list *list)
+intel_init_validate_list(struct intel_bo_list *list)
 {
-    struct intel_bo_node *node;
+    DRMINITLISTHEAD(&list->list);
+    list->numCurrent = 0;
+}
+
+/**
+ * Empties the validation list and clears the relocations 
+ */
+static void
+intel_free_validate_list(dri_bufmgr_ttm *bufmgr_ttm)
+{
+    struct intel_bo_list *list = &bufmgr_ttm->list;
     drmMMListHead *l;
 
-    l = list->list.next;
-    while(l != &list->list) {
+    for (l = list->list.next; l != &list->list; l = list->list.next) {
+        struct intel_bo_node *node =
+	   DRMLISTENTRY(struct intel_bo_node, l, head);
+	dri_bo_ttm *bo_ttm = (dri_bo_ttm *)node->bo;
+
 	DRMLISTDEL(l);
-	node = DRMLISTENTRY(struct intel_bo_node, l, head);
-	list->destroy(node);
-	l = list->list.next;
+
+	/* Clear relocation list */
+	if (bo_ttm->relocs != NULL)
+	   bo_ttm->relocs[0] = bo_ttm->relocs[0] & ~0xffff;
+
+	dri_bo_unreference(node->bo);
+
+	drmFree(node);
 	list->numCurrent--;
     }
 }
 
-static void
-generic_destroy(void *nodep)
+static void dri_ttm_dump_validation_list(dri_bufmgr_ttm *bufmgr_ttm)
 {
-    free(nodep);
-}
+    struct intel_bo_list *list = &bufmgr_ttm->list;
+    drmMMListHead *l;
+    int i = 0;
 
-static int
-intel_create_bo_list(int numTarget, struct intel_bo_list *list,
-		     void (*destroy)(void *))
-{
-    DRMINITLISTHEAD(&list->list);
-    list->numCurrent = 0;
-    if (destroy)
-        list->destroy = destroy;
-    else
-        list->destroy = generic_destroy;
-    return 0;
+    for (l = list->list.next; l != &list->list; l = l->next) {
+	int j;
+        struct intel_bo_node *node =
+	    DRMLISTENTRY(struct intel_bo_node, l, head);
+	dri_bo_ttm *bo_ttm = (dri_bo_ttm *)node->bo;
+
+	if (bo_ttm->relocs != NULL) {
+	    for (j = 0; j < (bo_ttm->relocs[0] & 0xffff); j++) {
+		uint32_t *reloc_entry = bo_ttm->relocs + I915_RELOC_HEADER +
+		    j * I915_RELOC0_STRIDE;
+
+		DBG("%2d: %s@0x%08x -> %d + 0x%08x\n",
+		    i, bo_ttm->name,
+		    reloc_entry[0], reloc_entry[2], reloc_entry[1]);
+	    }
+	} else {
+	    DBG("%2d: %s\n", i, bo_ttm->name);
+	}
+	i++;
+    }
 }
 
-
 static struct drm_i915_op_arg *
 intel_setup_validate_list(dri_bufmgr_ttm *bufmgr_ttm, GLuint *count_p)
 {
     struct intel_bo_list *list = &bufmgr_ttm->list;
-    struct intel_bo_list *reloc_list = &bufmgr_ttm->reloc_list;
-    struct intel_bo_node *node;
-    struct intel_bo_reloc_node *rl_node;
-    drmMMListHead *l, *rl;
-    struct drm_i915_op_arg *arg, *first;
-    struct drm_bo_op_req *req;
+    drmMMListHead *l;
+    struct drm_i915_op_arg *first;
     uint64_t *prevNext = NULL;
     GLuint count = 0;
 
     first = NULL;
 
     for (l = list->list.next; l != &list->list; l = l->next) {
-        node = DRMLISTENTRY(struct intel_bo_node, l, head);
-
-        arg = &node->bo_arg;
-        req = &arg->d.req;
+        struct intel_bo_node *node =
+	    DRMLISTENTRY(struct intel_bo_node, l, head);
+	dri_bo_ttm *ttm_buf = (dri_bo_ttm *)node->bo;
+	struct drm_i915_op_arg *arg = &node->bo_arg;
+	struct drm_bo_op_req *req = &arg->d.req;
 
         if (!first)
             first = arg;
@@ -189,27 +189,22 @@ intel_setup_validate_list(dri_bufmgr_ttm *bufmgr_ttm, GLuint *count_p)
 
 	memset(arg, 0, sizeof(*arg));
 	prevNext = &arg->next;
-	req->bo_req.handle = node->buf->handle;
+	req->bo_req.handle = ttm_buf->drm_bo.handle;
 	req->op = drm_bo_validate;
 	req->bo_req.flags = node->flags;
 	req->bo_req.hint = 0;
 #ifdef DRM_BO_HINT_PRESUMED_OFFSET
 	req->bo_req.hint |= DRM_BO_HINT_PRESUMED_OFFSET;
-	req->bo_req.presumed_offset = ((dri_bo *) node->priv)->offset;
+	req->bo_req.presumed_offset = node->bo->offset;
 #endif
 	req->bo_req.mask = node->mask;
 	req->bo_req.fence_class = 0; /* Backwards compat. */
-	arg->reloc_handle = 0;
 
-	for (rl = reloc_list->list.next; rl != &reloc_list->list;
-	     rl = rl->next)
-	{
-	    rl_node = DRMLISTENTRY(struct intel_bo_reloc_node, rl, head);
+	if (ttm_buf->reloc_buf != NULL)
+	    arg->reloc_handle = ttm_buf->reloc_buf->handle;
+	else
+	    arg->reloc_handle = 0;
 
-	    if (rl_node->handle == node->buf->handle) {
-		arg->reloc_handle = rl_node->type_list.buf.handle;
-	    }
-	}
 	count++;
     }
 
@@ -220,46 +215,6 @@ intel_setup_validate_list(dri_bufmgr_ttm *bufmgr_ttm, GLuint *count_p)
     return first;
 }
 
-static void
-intel_free_validate_list(dri_bufmgr_ttm *bufmgr_ttm)
-{
-    struct intel_bo_list *list = &bufmgr_ttm->list;
-    struct intel_bo_node *node;
-    drmMMListHead *l;
-
-    for (l = list->list.next; l != &list->list; l = l->next) {
-        node = DRMLISTENTRY(struct intel_bo_node, l, head);
-
-	if (node->destroy)
-	    (*node->destroy)(node->priv);
-
-    }
-}
-
-static void
-intel_free_reloc_list(dri_bufmgr_ttm *bufmgr_ttm)
-{
-    struct intel_bo_list *reloc_list = &bufmgr_ttm->reloc_list;
-    struct intel_bo_reloc_node *reloc_node;
-    drmMMListHead *rl, *tmp;
-
-    for (rl = reloc_list->list.next, tmp = rl->next; rl != &reloc_list->list;
-	 rl = tmp, tmp = rl->next)
-    {
-	reloc_node = DRMLISTENTRY(struct intel_bo_reloc_node, rl, head);
-
-	DRMLISTDEL(rl);
-
-	if (reloc_node->nr_reloc_types > 1) {
-	    /* TODO */
-	}
-
-	drmBOUnmap(bufmgr_ttm->fd, &reloc_node->type_list.buf);
-	drmBOUnreference(bufmgr_ttm->fd, &reloc_node->type_list.buf);
-	free(reloc_node);
-    }
-}
-
 /**
  * Adds the given buffer to the list of buffers to be validated (moved into the
  * appropriate memory type) with the next batch submission.
@@ -268,24 +223,26 @@ intel_free_reloc_list(dri_bufmgr_ttm *bufmgr_ttm)
  * with the intersection of the memory type flags and the union of the
  * remaining flags.
  */
-static int
+static struct intel_bo_node *
 intel_add_validate_buffer(dri_bufmgr_ttm *bufmgr_ttm,
 			  dri_bo *buf,
 			  uint64_t flags, uint64_t mask,
-			  int *itemLoc, void (*destroy_cb)(void *))
+			  int *itemLoc)
 {
     struct intel_bo_list *list = &bufmgr_ttm->list;
-    struct intel_bo_node *node, *cur;
+    struct intel_bo_node *cur;
+    dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf;
     drmMMListHead *l;
     int count = 0;
     int ret = 0;
-    drmBO *buf_bo = &((dri_bo_ttm *)buf)->drm_bo;
     cur = NULL;
 
     /* Find the buffer in the validation list if it's already there. */
     for (l = list->list.next; l != &list->list; l = l->next) {
-	node = DRMLISTENTRY(struct intel_bo_node, l, head);
-	if (node->buf->handle == buf_bo->handle) {
+	struct intel_bo_node *node =
+	    DRMLISTENTRY(struct intel_bo_node, l, head);
+
+	if (((dri_bo_ttm *)node->bo)->drm_bo.handle == ttm_buf->drm_bo.handle) {
 	    cur = node;
 	    break;
 	}
@@ -295,13 +252,12 @@ intel_add_validate_buffer(dri_bufmgr_ttm *bufmgr_ttm,
     if (!cur) {
 	cur = drmMalloc(sizeof(*cur));
 	if (!cur) {
-	    return -ENOMEM;
+	    return NULL;
 	}
-	cur->buf = buf_bo;
-	cur->priv = buf;
+	cur->bo = buf;
+	dri_bo_reference(buf);
 	cur->flags = flags;
 	cur->mask = mask;
-	cur->destroy = destroy_cb;
 	ret = 1;
 
 	DRMLISTADDTAIL(&cur->head, &list->list);
@@ -314,21 +270,22 @@ intel_add_validate_buffer(dri_bufmgr_ttm *bufmgr_ttm,
 		    "%s: No shared memory types between "
 		    "0x%16llx and 0x%16llx\n",
 		    __FUNCTION__, cur->flags, flags);
-	    return -EINVAL;
+	    return NULL;
 	}
 	if (mask & cur->mask & ~DRM_BO_MASK_MEM  & (cur->flags ^ flags)) {
 	    fprintf(stderr,
 		    "%s: Incompatible flags between 0x%16llx and 0x%16llx "
 		    "(0x%16llx, 0x%16llx masks)\n",
 		    __FUNCTION__, cur->flags, flags, cur->mask, mask);
-	    return -EINVAL;
+	    return NULL;
 	}
 	cur->mask |= mask;
 	cur->flags = memFlags | ((cur->flags | flags) &
 				cur->mask & ~DRM_BO_MASK_MEM);
     }
     *itemLoc = count;
-    return ret;
+
+    return cur;
 }
 
 
@@ -336,132 +293,58 @@ intel_add_validate_buffer(dri_bufmgr_ttm *bufmgr_ttm,
 	sizeof(uint32_t))
 
 static int
-intel_create_new_reloc_type_list(dri_bufmgr_ttm *bufmgr_ttm,
-				 struct intel_bo_reloc_list *cur_type)
+intel_setup_reloc_list(dri_bo *bo)
 {
+    dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo;
+    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bo->bufmgr;
     int ret;
 
-    /* should allocate a drmBO here */
-    ret = drmBOCreate(bufmgr_ttm->fd, RELOC_BUF_SIZE(bufmgr_ttm->max_relocs), 0,
+    /* If the buffer exists, then it was just created, or it was reintialized
+     * at the last intel_free_validate_list().
+     */
+    if (bo_ttm->reloc_buf != NULL)
+       return 0;
+
+    bo_ttm->reloc_buf = malloc(sizeof(bo_ttm->drm_bo));
+
+    ret = drmBOCreate(bufmgr_ttm->fd,
+		      RELOC_BUF_SIZE(bufmgr_ttm->max_relocs), 0,
 		      NULL,
 		      DRM_BO_FLAG_MEM_LOCAL |
 		      DRM_BO_FLAG_READ |
 		      DRM_BO_FLAG_WRITE |
 		      DRM_BO_FLAG_MAPPABLE |
 		      DRM_BO_FLAG_CACHED,
-		      0, &cur_type->buf);
+		      0, bo_ttm->reloc_buf);
     if (ret) {
-	fprintf(stderr, "Failed to create relocation BO: %s\n",
-		strerror(-ret));
-	return ret;
+       fprintf(stderr, "Failed to create relocation BO: %s\n",
+	       strerror(-ret));
+       return ret;
     }
 
-    ret = drmBOMap(bufmgr_ttm->fd, &cur_type->buf,
+    ret = drmBOMap(bufmgr_ttm->fd, bo_ttm->reloc_buf,
 		   DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE,
-		   0, (void **)&cur_type->relocs);
+		   0, (void **)&bo_ttm->relocs);
     if (ret) {
-	fprintf(stderr, "Failed to map relocation BO: %s\n", strerror(-ret));
-	return ret;
-    }
-    return 0;
-}
-
-/**
- * Adds the relocation @reloc_info to the relocation list.
- */
-static int
-intel_add_validate_reloc(dri_bufmgr_ttm *bufmgr_ttm,
-			 struct intel_reloc_info *reloc_info)
-{
-    struct intel_bo_list *reloc_list = &bufmgr_ttm->reloc_list;
-    struct intel_bo_reloc_node *rl_node, *cur;
-    drmMMListHead *rl, *l;
-    int ret = 0;
-    uint32_t *reloc_start;
-    int num_relocs;
-    struct intel_bo_reloc_list *cur_type;
-
-    cur = NULL;
-
-    for (rl = reloc_list->list.next; rl != &reloc_list->list; rl = rl->next) {
-	rl_node = DRMLISTENTRY(struct intel_bo_reloc_node, rl, head);
-	if (rl_node->handle == reloc_info->handle) {
-	    cur = rl_node;
-	    break;
-	}
-    }
-
-    if (!cur) {
-
-	cur = malloc(sizeof(*cur));
-	if (!cur)
-	    return -ENOMEM;
-
-	cur->nr_reloc_types = 1;
-	cur->handle = reloc_info->handle;
-	cur_type = &cur->type_list;
-
-	DRMINITLISTHEAD(&cur->type_list.head);
-	ret = intel_create_new_reloc_type_list(bufmgr_ttm, cur_type);
-	if (ret) {
-	    return -1;
-	}
-	DRMLISTADDTAIL(&cur->head, &reloc_list->list);
-
-	cur_type->relocs[0] = 0 | (reloc_info->type << 16);
-	cur_type->relocs[1] = 0; // next reloc buffer handle is 0
-
-    } else {
-	int found = 0;
-	if ((cur->type_list.relocs[0] >> 16) == reloc_info->type) {
-		cur_type = &cur->type_list;
-		found = 1;
-	} else {
-	    for (l = cur->type_list.head.next; l != &cur->type_list.head;
-		 l = l->next)
-	    {
-	        cur_type = DRMLISTENTRY(struct intel_bo_reloc_list, l, head);
-	        if (((cur_type->relocs[0] >> 16) & 0xffff) == reloc_info->type)
-	    	    found = 1;
-		break;
-	    }
-        }
-
-	/* didn't find the relocation type */
-	if (!found) {
-	    cur_type = malloc(sizeof(*cur_type));
-	    if (!cur_type) {
-		return -ENOMEM;
-	    }
-
-	    ret = intel_create_new_reloc_type_list(bufmgr_ttm, cur_type);
-	    DRMLISTADDTAIL(&cur_type->head, &cur->type_list.head);
-
-	    cur_type->relocs[0] = (reloc_info->type << 16);
-	    cur_type->relocs[1] = 0;
-
-	    cur->nr_reloc_types++;
-	}
+       fprintf(stderr, "Failed to map relocation BO: %s\n",
+	       strerror(-ret));
+       return ret;
     }
 
-    reloc_start = cur_type->relocs;
-
-    num_relocs = (reloc_start[0] & 0xffff);
+    /* Initialize the relocation list with the header:
+     * DWORD 0: relocation type, relocation count
+     * DWORD 1: handle to next relocation list (currently none)
+     * DWORD 2: unused
+     * DWORD 3: unused
+     */
+    bo_ttm->relocs[0] = I915_RELOC_TYPE_0 << 16;
+    bo_ttm->relocs[1] = 0;
+    bo_ttm->relocs[2] = 0;
+    bo_ttm->relocs[3] = 0;
 
-    reloc_start[num_relocs * I915_RELOC0_STRIDE + I915_RELOC_HEADER] =
-       reloc_info->reloc;
-    reloc_start[num_relocs * I915_RELOC0_STRIDE + I915_RELOC_HEADER + 1] =
-       reloc_info->delta;
-    reloc_start[num_relocs * I915_RELOC0_STRIDE + I915_RELOC_HEADER + 2] =
-       reloc_info->index;
-    reloc_start[0]++;
-    if (((reloc_start[0] & 0xffff)) > (bufmgr_ttm->max_relocs)) {
-	return -ENOMEM;
-    }
     return 0;
 }
 
-
 #if 0
 int
 driFenceSignaled(DriFenceObject * fence, unsigned type)
@@ -472,9 +355,7 @@ driFenceSignaled(DriFenceObject * fence, unsigned type)
     if (fence == NULL)
 	return GL_TRUE;
 
-    _glthread_LOCK_MUTEX(fence->mutex);
     ret = drmFenceSignaled(bufmgr_ttm->fd, &fence->fence, type, &signaled);
-    _glthread_UNLOCK_MUTEX(fence->mutex);
     BM_CKFATAL(ret);
     return signaled;
 }
@@ -485,14 +366,12 @@ dri_ttm_alloc(dri_bufmgr *bufmgr, const char *name,
 	      unsigned long size, unsigned int alignment,
 	      uint64_t location_mask)
 {
-    dri_bufmgr_ttm *ttm_bufmgr;
+    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
     dri_bo_ttm *ttm_buf;
     unsigned int pageSize = getpagesize();
     int ret;
     unsigned int flags, hint;
 
-    ttm_bufmgr = (dri_bufmgr_ttm *)bufmgr;
-
     ttm_buf = malloc(sizeof(*ttm_buf));
     if (!ttm_buf)
 	return NULL;
@@ -506,7 +385,7 @@ dri_ttm_alloc(dri_bufmgr *bufmgr, const char *name,
     /* No hints we want to use. */
     hint = 0;
 
-    ret = drmBOCreate(ttm_bufmgr->fd, size, alignment / pageSize,
+    ret = drmBOCreate(bufmgr_ttm->fd, size, alignment / pageSize,
 		      NULL, flags, hint, &ttm_buf->drm_bo);
     if (ret != 0) {
 	free(ttm_buf);
@@ -518,10 +397,10 @@ dri_ttm_alloc(dri_bufmgr *bufmgr, const char *name,
     ttm_buf->bo.bufmgr = bufmgr;
     ttm_buf->name = name;
     ttm_buf->refcount = 1;
+    ttm_buf->reloc_buf = NULL;
+    ttm_buf->relocs = NULL;
 
-#if BUFMGR_DEBUG
-    fprintf(stderr, "bo_create: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
-#endif
+    DBG("bo_create: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
 
     return &ttm_buf->bo;
 }
@@ -548,17 +427,15 @@ dri_bo *
 intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name,
 			      unsigned int handle)
 {
-    dri_bufmgr_ttm *ttm_bufmgr;
+    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
     dri_bo_ttm *ttm_buf;
     int ret;
 
-    ttm_bufmgr = (dri_bufmgr_ttm *)bufmgr;
-
     ttm_buf = malloc(sizeof(*ttm_buf));
     if (!ttm_buf)
 	return NULL;
 
-    ret = drmBOReference(ttm_bufmgr->fd, handle, &ttm_buf->drm_bo);
+    ret = drmBOReference(bufmgr_ttm->fd, handle, &ttm_buf->drm_bo);
     if (ret != 0) {
        fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n",
 	       name, handle, strerror(-ret));
@@ -571,11 +448,11 @@ intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name,
     ttm_buf->bo.bufmgr = bufmgr;
     ttm_buf->name = name;
     ttm_buf->refcount = 1;
+    ttm_buf->reloc_buf = NULL;
+    ttm_buf->relocs = NULL;
 
-#if BUFMGR_DEBUG
-    fprintf(stderr, "bo_create_from_handle: %p %08x (%s)\n",
-	    &ttm_buf->bo, handle, ttm_buf->name);
-#endif
+    DBG("bo_create_from_handle: %p %08x (%s)\n",
+	&ttm_buf->bo, handle, ttm_buf->name);
 
     return &ttm_buf->bo;
 }
@@ -583,12 +460,9 @@ intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name,
 static void
 dri_ttm_bo_reference(dri_bo *buf)
 {
-    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr;
     dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf;
 
-    _glthread_LOCK_MUTEX(bufmgr_ttm->mutex);
     ttm_buf->refcount++;
-    _glthread_UNLOCK_MUTEX(bufmgr_ttm->mutex);
 }
 
 static void
@@ -600,24 +474,25 @@ dri_ttm_bo_unreference(dri_bo *buf)
     if (!buf)
 	return;
 
-    _glthread_LOCK_MUTEX(bufmgr_ttm->mutex);
     if (--ttm_buf->refcount == 0) {
 	int ret;
 
+	if (ttm_buf->reloc_buf) {
+	    drmBOUnmap(bufmgr_ttm->fd, ttm_buf->reloc_buf);
+	    drmBOUnreference(bufmgr_ttm->fd, ttm_buf->reloc_buf);
+	    free(ttm_buf->reloc_buf);
+	}
+
 	ret = drmBOUnreference(bufmgr_ttm->fd, &ttm_buf->drm_bo);
 	if (ret != 0) {
 	    fprintf(stderr, "drmBOUnreference failed (%s): %s\n",
 		    ttm_buf->name, strerror(-ret));
 	}
-#if BUFMGR_DEBUG
-	fprintf(stderr, "bo_unreference final: %p (%s)\n",
-		&ttm_buf->bo, ttm_buf->name);
-#endif
-	_glthread_UNLOCK_MUTEX(bufmgr_ttm->mutex);
+	DBG("bo_unreference final: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
+
 	free(buf);
 	return;
     }
-    _glthread_UNLOCK_MUTEX(bufmgr_ttm->mutex);
 }
 
 static int
@@ -635,9 +510,7 @@ dri_ttm_bo_map(dri_bo *buf, GLboolean write_enable)
 
     assert(buf->virtual == NULL);
 
-#if BUFMGR_DEBUG
-    fprintf(stderr, "bo_map: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
-#endif
+    DBG("bo_map: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
 
     return drmBOMap(bufmgr_ttm->fd, &ttm_buf->drm_bo, flags, 0, &buf->virtual);
 }
@@ -657,9 +530,7 @@ dri_ttm_bo_unmap(dri_bo *buf)
 
     buf->virtual = NULL;
 
-#if BUFMGR_DEBUG
-    fprintf(stderr, "bo_unmap: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
-#endif
+    DBG("bo_unmap: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
 
     return drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo);
 }
@@ -674,11 +545,9 @@ dri_fence *
 intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name,
 				drm_fence_arg_t *arg)
 {
-    dri_bufmgr_ttm *ttm_bufmgr;
+    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
     dri_fence_ttm *ttm_fence;
 
-    ttm_bufmgr = (dri_bufmgr_ttm *)bufmgr;
-
     ttm_fence = malloc(sizeof(*ttm_fence));
     if (!ttm_fence)
 	return NULL;
@@ -694,10 +563,8 @@ intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name,
     ttm_fence->name = name;
     ttm_fence->refcount = 1;
 
-#if BUFMGR_DEBUG
-    fprintf(stderr, "fence_create_from_handle: %p (%s)\n", &ttm_fence->fence,
-	    ttm_fence->name);
-#endif
+    DBG("fence_create_from_handle: %p (%s)\n",
+	&ttm_fence->fence, ttm_fence->name);
 
     return &ttm_fence->fence;
 }
@@ -709,13 +576,8 @@ dri_ttm_fence_reference(dri_fence *fence)
     dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence;
     dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr;
 
-    _glthread_LOCK_MUTEX(bufmgr_ttm->mutex);
     ++fence_ttm->refcount;
-    _glthread_UNLOCK_MUTEX(bufmgr_ttm->mutex);
-#if BUFMGR_DEBUG
-    fprintf(stderr, "fence_reference: %p (%s)\n", &fence_ttm->fence,
-	    fence_ttm->name);
-#endif
+    DBG("fence_reference: %p (%s)\n", &fence_ttm->fence, fence_ttm->name);
 }
 
 static void
@@ -727,11 +589,8 @@ dri_ttm_fence_unreference(dri_fence *fence)
     if (!fence)
 	return;
 
-#if BUFMGR_DEBUG
-    fprintf(stderr, "fence_unreference: %p (%s)\n", &fence_ttm->fence,
-	    fence_ttm->name);
-#endif
-    _glthread_LOCK_MUTEX(bufmgr_ttm->mutex);
+    DBG("fence_unreference: %p (%s)\n", &fence_ttm->fence, fence_ttm->name);
+
     if (--fence_ttm->refcount == 0) {
 	int ret;
 
@@ -741,11 +600,9 @@ dri_ttm_fence_unreference(dri_fence *fence)
 		    fence_ttm->name, strerror(-ret));
 	}
 
-	_glthread_UNLOCK_MUTEX(bufmgr_ttm->mutex);
 	free(fence);
 	return;
     }
-    _glthread_UNLOCK_MUTEX(bufmgr_ttm->mutex);
 }
 
 static void
@@ -755,19 +612,14 @@ dri_ttm_fence_wait(dri_fence *fence)
     dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr;
     int ret;
 
-    _glthread_LOCK_MUTEX(bufmgr_ttm->mutex);
     ret = drmFenceWait(bufmgr_ttm->fd, DRM_FENCE_FLAG_WAIT_LAZY, &fence_ttm->drm_fence, 0);
-    _glthread_UNLOCK_MUTEX(bufmgr_ttm->mutex);
     if (ret != 0) {
 	_mesa_printf("%s:%d: Error %d waiting for fence %s.\n",
 		     __FILE__, __LINE__, ret, fence_ttm->name);
 	abort();
     }
 
-#if BUFMGR_DEBUG
-    fprintf(stderr, "fence_wait: %p (%s)\n", &fence_ttm->fence,
-	    fence_ttm->name);
-#endif
+    DBG("fence_wait: %p (%s)\n", &fence_ttm->fence, fence_ttm->name);
 }
 
 static void
@@ -775,52 +627,56 @@ dri_bufmgr_ttm_destroy(dri_bufmgr *bufmgr)
 {
     dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
 
-    intel_bo_free_list(&bufmgr_ttm->list);
-    intel_bo_free_list(&bufmgr_ttm->reloc_list);
+    intel_free_validate_list(bufmgr_ttm);
 
-    _glthread_DESTROY_MUTEX(bufmgr_ttm->mutex);
     free(bufmgr);
 }
 
-
-static void
-intel_dribo_destroy_callback(void *priv)
-{
-    dri_bo *dribo = priv;
-
-    if (dribo)
-	dri_bo_unreference(dribo);
-}
-
+/**
+ * Adds the target buffer to the validation list and adds the relocation
+ * to the reloc_buffer's relocation list.
+ *
+ * The relocation entry at the given offset must already contain the
+ * precomputed relocation value, because the kernel will optimize out
+ * the relocation entry write when the buffer hasn't moved from the
+ * last known offset in target_buf.
+ */
 static void
 dri_ttm_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta,
 		   GLuint offset, dri_bo *target_buf)
 {
-    dri_bo_ttm *ttm_buf = (dri_bo_ttm *)reloc_buf;
     dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)reloc_buf->bufmgr;
-    int newItem;
-    struct intel_reloc_info reloc;
+    dri_bo_ttm *reloc_buf_ttm = (dri_bo_ttm *)reloc_buf;
+    struct intel_bo_node *node;
+    int index;
     int mask;
-    int ret;
+    int num_relocs;
+    uint32_t *this_reloc;
 
     mask = DRM_BO_MASK_MEM;
     mask |= flags & (DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE | DRM_BO_FLAG_EXE);
 
-    ret = intel_add_validate_buffer(bufmgr_ttm, target_buf, flags, mask,
-				    &newItem, intel_dribo_destroy_callback);
-    if (ret < 0)
-	return;
+    node = intel_add_validate_buffer(bufmgr_ttm, target_buf, flags, mask,
+				     &index);
+
+    intel_setup_reloc_list(reloc_buf);
 
-    if (ret == 1)
-	dri_bo_reference(target_buf);
+    num_relocs = (reloc_buf_ttm->relocs[0] & 0xffff);
 
-    reloc.type = I915_RELOC_TYPE_0;
-    reloc.reloc = offset;
-    reloc.delta = delta;
-    reloc.index = newItem;
-    reloc.handle = ttm_buf->drm_bo.handle;
+    /* Check overflow */
+    assert((reloc_buf_ttm->relocs[0] & 0xffff) < bufmgr_ttm->max_relocs);
 
-    intel_add_validate_reloc(bufmgr_ttm, &reloc);
+    this_reloc = reloc_buf_ttm->relocs + I915_RELOC_HEADER +
+	num_relocs * I915_RELOC0_STRIDE;
+
+    this_reloc[0] = offset;
+    this_reloc[1] = delta;
+    this_reloc[2] = index;
+    this_reloc[3] = 0;
+
+    reloc_buf_ttm->relocs[0]++; /* Increment relocation count */
+    /* Check wraparound */
+    assert((reloc_buf_ttm->relocs[0] & 0xffff) != 0);
 }
 
 
@@ -829,7 +685,7 @@ dri_ttm_process_reloc(dri_bo *batch_buf, GLuint *count)
 {
     dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr;
     void *ptr;
-    int itemLoc;
+    int index;
 
     /* Add the batch buffer to the validation list.  There are no relocations
      * pointing to it.
@@ -837,7 +693,7 @@ dri_ttm_process_reloc(dri_bo *batch_buf, GLuint *count)
     intel_add_validate_buffer(bufmgr_ttm, batch_buf,
 			      DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE,
 			      DRM_BO_MASK_MEM | DRM_BO_FLAG_EXE,
-			      &itemLoc, NULL);
+			      &index);
 
     ptr = intel_setup_validate_list(bufmgr_ttm, count);
 
@@ -857,7 +713,7 @@ intel_update_buffer_offsets (dri_bufmgr_ttm *bufmgr_ttm)
         node = DRMLISTENTRY(struct intel_bo_node, l, head);
 	arg = &node->bo_arg;
 	rep = &arg->d.rep;
-	((dri_bo *) node->priv)->offset = rep->bo_info.offset;
+	node->bo->offset = rep->bo_info.offset;
     }
 }
 
@@ -867,10 +723,11 @@ dri_ttm_post_submit(dri_bo *batch_buf, dri_fence **last_fence)
     dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr;
 
     intel_update_buffer_offsets (bufmgr_ttm);
-    intel_free_validate_list(bufmgr_ttm);
-    intel_free_reloc_list(bufmgr_ttm);
 
-    intel_bo_free_list(&bufmgr_ttm->list);
+    if (bufmgr_ttm->bufmgr.debug)
+	dri_ttm_dump_validation_list(bufmgr_ttm);
+
+    intel_free_validate_list(bufmgr_ttm);
 }
 
 /**
@@ -892,13 +749,11 @@ intel_bufmgr_ttm_init(int fd, unsigned int fence_type,
     bufmgr_ttm->fd = fd;
     bufmgr_ttm->fence_type = fence_type;
     bufmgr_ttm->fence_type_flush = fence_type_flush;
-    _glthread_INIT_MUTEX(bufmgr_ttm->mutex);
 
     /* lets go with one relocation per every four dwords - purely heuristic */
     bufmgr_ttm->max_relocs = batch_size / sizeof(uint32_t) / 4;
 
-    intel_create_bo_list(10, &bufmgr_ttm->list, NULL);
-    intel_create_bo_list(1, &bufmgr_ttm->reloc_list, NULL);
+    intel_init_validate_list(&bufmgr_ttm->list);
 
     bufmgr_ttm->bufmgr.bo_alloc = dri_ttm_alloc;
     bufmgr_ttm->bufmgr.bo_alloc_static = dri_ttm_alloc_static;
@@ -913,6 +768,7 @@ intel_bufmgr_ttm_init(int fd, unsigned int fence_type,
     bufmgr_ttm->bufmgr.emit_reloc = dri_ttm_emit_reloc;
     bufmgr_ttm->bufmgr.process_relocs = dri_ttm_process_reloc;
     bufmgr_ttm->bufmgr.post_submit = dri_ttm_post_submit;
+    bufmgr_ttm->bufmgr.debug = GL_FALSE;
 
     return &bufmgr_ttm->bufmgr;
 }
diff --git a/src/mesa/drivers/dri/intel/intel_chipset.h b/src/mesa/drivers/dri/intel/intel_chipset.h
index 2f49bf77ea..4fc4c96376 100644
--- a/src/mesa/drivers/dri/intel/intel_chipset.h
+++ b/src/mesa/drivers/dri/intel/intel_chipset.h
@@ -76,3 +76,9 @@
 				 devid == PCI_CHIP_Q33_G || \
 				 IS_965(devid))
 
+#define IS_945(devid)		(devid == PCI_CHIP_I945_G || \
+				 devid == PCI_CHIP_I945_GM || \
+				 devid == PCI_CHIP_I945_GME || \
+				 devid == PCI_CHIP_G33_G || \
+				 devid == PCI_CHIP_Q33_G || \
+				 devid == PCI_CHIP_Q35_G)
diff --git a/src/mesa/drivers/dri/intel/intel_decode.c b/src/mesa/drivers/dri/intel/intel_decode.c
index a1a7ac9ffb..73f0fcd591 100644
--- a/src/mesa/drivers/dri/intel/intel_decode.c
+++ b/src/mesa/drivers/dri/intel/intel_decode.c
@@ -214,7 +214,7 @@ decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures)
 	instr_out(data, hw_offset, 2, "(%d,%d)\n",
 		  data[2] & 0xffff, data[2] >> 16);
 	instr_out(data, hw_offset, 3, "(%d,%d)\n",
-		  data[2] & 0xffff, data[2] >> 16);
+		  data[3] & 0xffff, data[3] >> 16);
 	instr_out(data, hw_offset, 4, "offset 0x%08x\n", data[4]);
 	instr_out(data, hw_offset, 5, "color\n");
 	return len;
@@ -918,6 +918,26 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures)
 
 	return len;
 
+    case 0x7900:
+	if (len != 4)
+	    fprintf(out, "Bad count in 3DSTATE_DRAWING_RECTANGLE\n");
+	if (count < 4)
+	    BUFFER_FAIL(count, len, "3DSTATE_DRAWING_RECTANGLE");
+
+	instr_out(data, hw_offset, 0,
+		  "3DSTATE_DRAWING_RECTANGLE\n");
+	instr_out(data, hw_offset, 1, "top left: %d,%d\n",
+		  data[1] & 0xffff,
+		  (data[1] >> 16) & 0xffff);
+	instr_out(data, hw_offset, 2, "bottom right: %d,%d\n",
+		  data[2] & 0xffff,
+		  (data[2] >> 16) & 0xffff);
+	instr_out(data, hw_offset, 3, "origin: %d,%d\n",
+		  (int)data[3] & 0xffff,
+		  ((int)data[3] >> 16) & 0xffff);
+
+	return len;
+
     case 0x7905:
 	if (len != 5)
 	    fprintf(out, "Bad count in 3DSTATE_DEPTH_BUFFER\n");
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 034304f91c..8d75c63cef 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -493,7 +493,8 @@ intel_bind_framebuffer(GLcontext * ctx, GLenum target,
    if (target == GL_FRAMEBUFFER_EXT || target == GL_DRAW_FRAMEBUFFER_EXT) {
       intel_draw_buffer(ctx, fb);
       /* Integer depth range depends on depth buffer bits */
-      ctx->Driver.DepthRange(ctx, ctx->Viewport.Near, ctx->Viewport.Far);
+      if (ctx->Driver.DepthRange != NULL)
+	 ctx->Driver.DepthRange(ctx, ctx->Viewport.Near, ctx->Viewport.Far);
    }
    else {
       /* don't need to do anything if target == GL_READ_FRAMEBUFFER_EXT */
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
index 0acf956a38..7637585033 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -28,6 +28,7 @@
 #include "intel_context.h"
 #include "intel_mipmap_tree.h"
 #include "intel_regions.h"
+#include "intel_chipset.h"
 #include "enums.h"
 
 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
@@ -50,13 +51,15 @@ target_to_target(GLenum target)
 
 struct intel_mipmap_tree *
 intel_miptree_create(struct intel_context *intel,
-                     GLenum target,
-                     GLenum internal_format,
-                     GLuint first_level,
-                     GLuint last_level,
-                     GLuint width0,
-                     GLuint height0,
-                     GLuint depth0, GLuint cpp, GLuint compress_byte)
+		     GLenum target,
+		     GLenum internal_format,
+		     GLuint first_level,
+		     GLuint last_level,
+		     GLuint width0,
+		     GLuint height0,
+		     GLuint depth0,
+		     GLuint cpp,
+		     GLuint compress_byte)
 {
    GLboolean ok;
    struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
@@ -75,54 +78,19 @@ intel_miptree_create(struct intel_context *intel,
    mt->cpp = compress_byte ? compress_byte : cpp;
    mt->compressed = compress_byte ? 1 : 0;
    mt->refcount = 1; 
+   mt->pitch = 0;
 
-   switch (intel->intelScreen->deviceID) {
-   case PCI_CHIP_I945_G:
-   case PCI_CHIP_I945_GM:
-   case PCI_CHIP_I945_GME:
-   case PCI_CHIP_G33_G:
-   case PCI_CHIP_Q33_G:
-   case PCI_CHIP_Q35_G:
-      ok = i945_miptree_layout(mt);
-      break;
-   case PCI_CHIP_I915_G:
-   case PCI_CHIP_I915_GM:
-   case PCI_CHIP_I830_M:
-   case PCI_CHIP_I855_GM:
-   case PCI_CHIP_I865_G:
-   default:
-      /* All the i830 chips and the i915 use this layout:
-       */
-      ok = i915_miptree_layout(mt);
-      break;
-   }
+#ifdef I915
+   if (IS_945(intel->intelScreen->deviceID))
+      ok = i945_miptree_layout(intel, mt);
+   else
+      ok = i915_miptree_layout(intel, mt);
+#else
+   ok = brw_miptree_layout(intel, mt);
+#endif
 
    if (ok) {
-      if (!mt->compressed) {
-	 int align;
-
-	 if (intel->ttm) {
-	    /* XXX: Align pitch to multiple of 64 bytes for now to allow
-	     * render-to-texture to work in all cases. This should probably be
-	     * replaced at some point by some scheme to only do this when really
-	     * necessary.
-	     */
-	    align = 63;
-	 } else {
-	    align = 3;
-	 }
-
-	 mt->pitch = (mt->pitch * cpp + align) & ~align;
-
-	 /* XXX: At least the i915 seems very upset when the pitch is a multiple
-	  * of 1024 and sometimes 512 bytes - performance can drop by several
-	  * times. Go to the next multiple of the required alignment for now.
-	  */
-	 if (!(mt->pitch & 511))
-	    mt->pitch += align + 1;
-
-	 mt->pitch /= cpp;
-      }
+      assert (mt->pitch);
 
       mt->region = intel_region_alloc(intel,
                                       mt->cpp, mt->pitch, mt->total_height);
@@ -136,6 +104,52 @@ intel_miptree_create(struct intel_context *intel,
    return mt;
 }
 
+/**
+ * intel_miptree_pitch_align:
+ *
+ * @intel: intel context pointer
+ *
+ * @mt: the miptree to compute pitch alignment for
+ *
+ * @pitch: the natural pitch value
+ *
+ * Given @pitch, compute a larger value which accounts for
+ * any necessary alignment required by the device
+ */
+
+int intel_miptree_pitch_align (struct intel_context *intel,
+			       struct intel_mipmap_tree *mt,
+			       int pitch)
+{
+   if (!mt->compressed) {
+      int pitch_align;
+
+      if (intel->ttm) {
+	 /* XXX: Align pitch to multiple of 64 bytes for now to allow
+	  * render-to-texture to work in all cases. This should probably be
+	  * replaced at some point by some scheme to only do this when really
+	  * necessary.
+	  */
+	 pitch_align = 64;
+      } else {
+	 pitch_align = 4;
+      }
+
+      pitch = ALIGN(pitch * mt->cpp, pitch_align);
+
+#ifdef I915
+      /* XXX: At least the i915 seems very upset when the pitch is a multiple
+       * of 1024 and sometimes 512 bytes - performance can drop by several
+       * times. Go to the next multiple of the required alignment for now.
+       */
+      if (!(pitch & 511))
+	 pitch += pitch_align;
+#endif
+
+      pitch /= mt->cpp;
+   }
+   return pitch;
+}
 
 void
 intel_miptree_reference(struct intel_mipmap_tree **dst,
@@ -207,11 +221,11 @@ intel_miptree_match_image(struct intel_mipmap_tree *mt,
 
 void
 intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
-                             GLuint level,
-                             GLuint nr_images,
-                             GLuint x, GLuint y, GLuint w, GLuint h, GLuint d)
+			     GLuint level,
+			     GLuint nr_images,
+			     GLuint x, GLuint y,
+			     GLuint w, GLuint h, GLuint d)
 {
-
    mt->level[level].width = w;
    mt->level[level].height = h;
    mt->level[level].depth = d;
@@ -238,7 +252,8 @@ intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
 
 void
 intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
-                               GLuint level, GLuint img, GLuint x, GLuint y)
+			       GLuint level, GLuint img,
+			       GLuint x, GLuint y)
 {
    if (img == 0 && level == 0)
       assert(x == 0 && y == 0);
@@ -271,12 +286,12 @@ intel_miptree_depth_offsets(struct intel_mipmap_tree *mt, GLuint level)
 
 
 GLuint
-intel_miptree_image_offset(struct intel_mipmap_tree * mt,
-                           GLuint face, GLuint level)
+intel_miptree_image_offset(struct intel_mipmap_tree *mt,
+			   GLuint face, GLuint level)
 {
    if (mt->target == GL_TEXTURE_CUBE_MAP_ARB)
       return (mt->level[level].level_offset +
-              mt->level[level].image_offset[face] * mt->cpp);
+	      mt->level[level].image_offset[face] * mt->cpp);
    else
       return mt->level[level].level_offset;
 }
@@ -323,11 +338,12 @@ intel_miptree_image_unmap(struct intel_context *intel,
  */
 void
 intel_miptree_image_data(struct intel_context *intel,
-                         struct intel_mipmap_tree *dst,
-                         GLuint face,
-                         GLuint level,
-                         void *src,
-                         GLuint src_row_pitch, GLuint src_image_pitch)
+			 struct intel_mipmap_tree *dst,
+			 GLuint face,
+			 GLuint level,
+			 void *src,
+			 GLuint src_row_pitch,
+			 GLuint src_image_pitch)
 {
    GLuint depth = dst->level[level].depth;
    GLuint dst_offset = intel_miptree_image_offset(dst, face, level);
@@ -335,18 +351,19 @@ intel_miptree_image_data(struct intel_context *intel,
    GLuint i;
    GLuint height = 0;
 
-   DBG("%s\n", __FUNCTION__);
+   DBG("%s: %d/%d\n", __FUNCTION__, face, level);
    for (i = 0; i < depth; i++) {
       height = dst->level[level].height;
       if(dst->compressed)
 	 height /= 4;
-      intel_region_data(intel, dst->region,
-                        dst_offset + dst_depth_offset[i], /* dst_offset */
-                        0, 0,                             /* dstx, dsty */
-                        src,
-                        src_row_pitch,
-                        0, 0,                             /* source x, y */
-                        dst->level[level].width, height); /* width, height */
+      intel_region_data(intel,
+			dst->region,
+			dst_offset + dst_depth_offset[i], /* dst_offset */
+			0, 0,                             /* dstx, dsty */
+			src,
+			src_row_pitch,
+			0, 0,                             /* source x, y */
+			dst->level[level].width, height); /* width, height */
 
       src += src_image_pitch * dst->cpp;
    }
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
index ecdb7be244..968eec4fec 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
@@ -123,6 +123,10 @@ struct intel_mipmap_tree *intel_miptree_create(struct intel_context *intel,
                                                GLuint cpp,
                                                GLuint compress_byte);
 
+int intel_miptree_pitch_align (struct intel_context *intel,
+			       struct intel_mipmap_tree *mt,
+			       int pitch);
+
 void intel_miptree_reference(struct intel_mipmap_tree **dst,
                              struct intel_mipmap_tree *src);
 
@@ -190,9 +194,11 @@ void intel_miptree_image_copy(struct intel_context *intel,
 
 /* i915_mipmap_tree.c:
  */
-GLboolean i915_miptree_layout(struct intel_mipmap_tree *mt);
-GLboolean i945_miptree_layout(struct intel_mipmap_tree *mt);
-
-
+GLboolean i915_miptree_layout(struct intel_context *intel,
+			      struct intel_mipmap_tree *mt);
+GLboolean i945_miptree_layout(struct intel_context *intel,
+			      struct intel_mipmap_tree *mt);
+GLboolean brw_miptree_layout(struct intel_context *intel,
+			     struct intel_mipmap_tree *mt);
 
 #endif
diff --git a/src/mesa/drivers/dri/intel/intel_pixel.c b/src/mesa/drivers/dri/intel/intel_pixel.c
new file mode 100644
index 0000000000..9018e3daef
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_pixel.c
@@ -0,0 +1,120 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portionsalloc
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "enums.h"
+#include "state.h"
+#include "swrast/swrast.h"
+
+#include "intel_context.h"
+#include "intel_pixel.h"
+#include "intel_regions.h"
+
+
+/**
+ * Check if any fragment operations are in effect which might effect
+ * glDraw/CopyPixels.
+ */
+GLboolean
+intel_check_blit_fragment_ops(GLcontext * ctx)
+{
+   if (ctx->NewState)
+      _mesa_update_state(ctx);
+
+   /* XXX Note: Scissor could be done with the blitter:
+    */
+   return !(ctx->_ImageTransferState ||
+            ctx->Color.AlphaEnabled ||
+            ctx->Depth.Test ||
+            ctx->Fog.Enabled ||
+            ctx->Scissor.Enabled ||
+            ctx->Stencil.Enabled ||
+            !ctx->Color.ColorMask[0] ||
+            !ctx->Color.ColorMask[1] ||
+            !ctx->Color.ColorMask[2] ||
+            !ctx->Color.ColorMask[3] ||
+            ctx->Texture._EnabledUnits || 
+	    ctx->FragmentProgram._Enabled ||
+	    ctx->Color.BlendEnabled);
+}
+
+
+GLboolean
+intel_check_meta_tex_fragment_ops(GLcontext * ctx)
+{
+   if (ctx->NewState)
+      _mesa_update_state(ctx);
+
+   /* Some of _ImageTransferState (scale, bias) could be done with
+    * fragment programs on i915.
+    */
+   return !(ctx->_ImageTransferState || ctx->Fog.Enabled ||     /* not done yet */
+            ctx->Texture._EnabledUnits || ctx->FragmentProgram._Enabled);
+}
+
+/* The intel_region struct doesn't really do enough to capture the
+ * format of the pixels in the region.  For now this code assumes that
+ * the region is a display surface and hence is either ARGB8888 or
+ * RGB565.
+ * XXX FBO: If we'd pass in the intel_renderbuffer instead of region, we'd
+ * know the buffer's pixel format.
+ *
+ * \param format  as given to glDraw/ReadPixels
+ * \param type  as given to glDraw/ReadPixels
+ */
+GLboolean
+intel_check_blit_format(struct intel_region * region,
+                        GLenum format, GLenum type)
+{
+   if (region->cpp == 4 &&
+       (type == GL_UNSIGNED_INT_8_8_8_8_REV ||
+        type == GL_UNSIGNED_BYTE) && format == GL_BGRA) {
+      return GL_TRUE;
+   }
+
+   if (region->cpp == 2 &&
+       type == GL_UNSIGNED_SHORT_5_6_5_REV && format == GL_BGR) {
+      return GL_TRUE;
+   }
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s: bad format for blit (cpp %d, type %s format %s)\n",
+              __FUNCTION__, region->cpp,
+              _mesa_lookup_enum_by_nr(type), _mesa_lookup_enum_by_nr(format));
+
+   return GL_FALSE;
+}
+
+
+void
+intelInitPixelFuncs(struct dd_function_table *functions)
+{
+   functions->Accum = _swrast_Accum;
+   functions->Bitmap = _swrast_Bitmap;
+   functions->CopyPixels = intelCopyPixels;
+   functions->ReadPixels = intelReadPixels;
+   functions->DrawPixels = intelDrawPixels;
+}
diff --git a/src/mesa/drivers/dri/i915/intel_pixel.h b/src/mesa/drivers/dri/intel/intel_pixel.h
index a6fcf90ce0..a6fcf90ce0 100644
--- a/src/mesa/drivers/dri/i915/intel_pixel.h
+++ b/src/mesa/drivers/dri/intel/intel_pixel.h
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
new file mode 100644
index 0000000000..3777422619
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
@@ -0,0 +1,357 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portionsalloc
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "glheader.h"
+#include "enums.h"
+#include "image.h"
+#include "colormac.h"
+#include "mtypes.h"
+#include "macros.h"
+#include "bufferobj.h"
+#include "swrast/swrast.h"
+
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_ioctl.h"
+#include "intel_batchbuffer.h"
+#include "intel_blit.h"
+#include "intel_regions.h"
+#include "intel_buffer_objects.h"
+
+
+
+#define FILE_DEBUG_FLAG DEBUG_PIXEL
+
+
+/* Unlike the other intel_pixel_* functions, the expectation here is
+ * that the incoming data is not in a PBO.  With the XY_TEXT blit
+ * method, there's no benefit haveing it in a PBO, but we could
+ * implement a path based on XY_MONO_SRC_COPY_BLIT which might benefit
+ * PBO bitmaps.  I think they are probably pretty rare though - I
+ * wonder if Xgl uses them?
+ */
+static const GLubyte *map_pbo( GLcontext *ctx,
+			       GLsizei width, GLsizei height,
+			       const struct gl_pixelstore_attrib *unpack,
+			       const GLubyte *bitmap )
+{
+   GLubyte *buf;
+
+   if (!_mesa_validate_pbo_access(2, unpack, width, height, 1,
+				  GL_COLOR_INDEX, GL_BITMAP,
+				  (GLvoid *) bitmap)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,"glBitmap(invalid PBO access)");
+      return NULL;
+   }
+
+   buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
+					   GL_READ_ONLY_ARB,
+					   unpack->BufferObj);
+   if (!buf) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glBitmap(PBO is mapped)");
+      return NULL;
+   }
+
+   return ADD_POINTERS(buf, bitmap);
+}
+
+static GLboolean test_bit( const GLubyte *src,
+			    GLuint bit )
+{
+   return (src[bit/8] & (1<<(bit % 8))) ? 1 : 0;
+}
+
+static void set_bit( GLubyte *dest,
+			  GLuint bit )
+{
+   dest[bit/8] |= 1 << (bit % 8);
+}
+
+/* Extract a rectangle's worth of data from the bitmap.  Called
+ * per-cliprect.
+ */
+static GLuint get_bitmap_rect(GLsizei width, GLsizei height,
+			      const struct gl_pixelstore_attrib *unpack,
+			      const GLubyte *bitmap,
+			      GLuint x, GLuint y, 
+			      GLuint w, GLuint h,
+			      GLubyte *dest,
+			      GLuint row_align,
+			      GLboolean invert)
+{
+   GLuint src_offset = (x + unpack->SkipPixels) & 0x7;
+   GLuint mask = unpack->LsbFirst ? 0 : 7;
+   GLuint bit = 0;
+   GLint row, col;
+   GLint first, last;
+   GLint incr;
+   GLuint count = 0;
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      _mesa_printf("%s %d,%d %dx%d bitmap %dx%d skip %d src_offset %d mask %d\n",
+		   __FUNCTION__, x,y,w,h,width,height,unpack->SkipPixels, src_offset, mask);
+
+   if (invert) {
+      first = h-1;
+      last = 0;
+      incr = -1;
+   }
+   else {
+      first = 0;
+      last = h-1;
+      incr = 1;
+   }
+
+   /* Require that dest be pre-zero'd.
+    */
+   for (row = first; row != (last+incr); row += incr) {
+      const GLubyte *rowsrc = _mesa_image_address2d(unpack, bitmap, 
+						    width, height, 
+						    GL_COLOR_INDEX, GL_BITMAP, 
+						    y + row, x);
+
+      for (col = 0; col < w; col++, bit++) {
+	 if (test_bit(rowsrc, (col + src_offset) ^ mask)) {
+	    set_bit(dest, bit ^ 7);
+	    count++;
+	 }
+      }
+
+      if (row_align)
+	 bit = ALIGN(bit, row_align);
+   }
+
+   return count;
+}
+
+
+
+
+/*
+ * Render a bitmap.
+ */
+static GLboolean
+do_blit_bitmap( GLcontext *ctx, 
+		GLint dstx, GLint dsty,
+		GLsizei width, GLsizei height,
+		const struct gl_pixelstore_attrib *unpack,
+		const GLubyte *bitmap )
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_region *dst = intel_drawbuf_region(intel);
+   GLfloat tmpColor[4];
+
+   union {
+      GLuint ui;
+      GLubyte ub[4];
+   } color;
+
+   if (!dst)
+       return GL_FALSE;
+
+   if (unpack->BufferObj->Name) {
+      bitmap = map_pbo(ctx, width, height, unpack, bitmap);
+      if (bitmap == NULL)
+	 return GL_TRUE;	/* even though this is an error, we're done */
+   }
+
+   COPY_4V(tmpColor, ctx->Current.RasterColor);
+
+   if (NEED_SECONDARY_COLOR(ctx)) {
+       ADD_3V(tmpColor, tmpColor, ctx->Current.RasterSecondaryColor);
+   }
+
+   UNCLAMPED_FLOAT_TO_CHAN(color.ub[0], tmpColor[2]);
+   UNCLAMPED_FLOAT_TO_CHAN(color.ub[1], tmpColor[1]);
+   UNCLAMPED_FLOAT_TO_CHAN(color.ub[2], tmpColor[0]);
+   UNCLAMPED_FLOAT_TO_CHAN(color.ub[3], tmpColor[3]);
+
+   /* Does zoom apply to bitmaps?
+    */
+   if (!intel_check_blit_fragment_ops(ctx) ||
+       ctx->Pixel.ZoomX != 1.0F || 
+       ctx->Pixel.ZoomY != 1.0F)
+      return GL_FALSE;
+
+   LOCK_HARDWARE(intel);
+
+   if (intel->driDrawable->numClipRects) {
+      __DRIdrawablePrivate *dPriv = intel->driDrawable;
+      drm_clip_rect_t *box = dPriv->pClipRects;
+      drm_clip_rect_t dest_rect;
+      GLint nbox = dPriv->numClipRects;
+      GLint srcx = 0, srcy = 0;
+      GLint orig_screen_x1, orig_screen_y2;
+      GLuint i;
+
+
+      orig_screen_x1 = dPriv->x + dstx;
+      orig_screen_y2 = dPriv->y + (dPriv->h - dsty);
+
+      /* Do scissoring in GL coordinates:
+       */
+      if (ctx->Scissor.Enabled)
+      {
+	 GLint x = ctx->Scissor.X;
+	 GLint y = ctx->Scissor.Y;
+	 GLuint w = ctx->Scissor.Width;
+	 GLuint h = ctx->Scissor.Height;
+
+         if (!_mesa_clip_to_region(x, y, x+w-1, y+h-1, &dstx, &dsty, &width, &height))
+            goto out;
+      }
+
+      /* Convert from GL to hardware coordinates:
+       */
+      dsty = dPriv->y + (dPriv->h - dsty - height);  
+      dstx = dPriv->x + dstx;
+
+      dest_rect.x1 = dstx < 0 ? 0 : dstx;
+      dest_rect.y1 = dsty < 0 ? 0 : dsty;
+      dest_rect.x2 = dstx + width < 0 ? 0 : dstx + width;
+      dest_rect.y2 = dsty + height < 0 ? 0 : dsty + height;
+
+      for (i = 0; i < nbox; i++) {
+         drm_clip_rect_t rect;
+	 int box_w, box_h;
+	 GLint px, py;
+	 GLuint stipple[32];  
+
+         if (!intel_intersect_cliprects(&rect, &dest_rect, &box[i]))
+            continue;
+
+	 /* Now go back to GL coordinates to figure out what subset of
+	  * the bitmap we are uploading for this cliprect:
+	  */
+	 box_w = rect.x2 - rect.x1;
+	 box_h = rect.y2 - rect.y1;
+	 srcx = rect.x1 - orig_screen_x1;
+	 srcy = orig_screen_y2 - rect.y2;
+
+
+#define DY 32
+#define DX 32
+
+	 /* Then, finally, chop it all into chunks that can be
+	  * digested by hardware:
+	  */
+	 for (py = 0; py < box_h; py += DY) { 
+	    for (px = 0; px < box_w; px += DX) { 
+	       int h = MIN2(DY, box_h - py);
+	       int w = MIN2(DX, box_w - px); 
+	       GLuint sz = ALIGN(ALIGN(w,8) * h, 64)/8;
+	       GLenum logic_op = ctx->Color.ColorLogicOpEnabled ?
+		  ctx->Color.LogicOp : GL_COPY;
+
+	       assert(sz <= sizeof(stipple));
+	       memset(stipple, 0, sz);
+
+	       /* May need to adjust this when padding has been introduced in
+		* sz above:
+		*/
+	       if (get_bitmap_rect(width, height, unpack, 
+				   bitmap,
+				   srcx + px, srcy + py, w, h,
+				   (GLubyte *)stipple,
+				   8,
+				   GL_TRUE) == 0)
+		  continue;
+
+	       /* 
+		*/
+	       intelEmitImmediateColorExpandBlit( intel,
+						  dst->cpp,
+						  (GLubyte *)stipple, 
+						  sz,
+						  color.ui,
+						  dst->pitch,
+						  dst->buffer,
+						  0,
+						  dst->tiled,
+						  rect.x1 + px,
+						  rect.y2 - (py + h),
+						  w, h,
+						  logic_op);
+	    } 
+	 } 
+      }
+      intel->need_flush = GL_TRUE;
+   out:
+      intel_batchbuffer_flush(intel->batch);
+   }
+   UNLOCK_HARDWARE(intel);
+
+
+   if (unpack->BufferObj->Name) {
+      /* done with PBO so unmap it now */
+      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
+                              unpack->BufferObj);
+   }
+
+   return GL_TRUE;
+}
+
+
+
+
+
+/* There are a large number of possible ways to implement bitmap on
+ * this hardware, most of them have some sort of drawback.  Here are a
+ * few that spring to mind:
+ * 
+ * Blit:
+ *    - XY_MONO_SRC_BLT_CMD
+ *         - use XY_SETUP_CLIP_BLT for cliprect clipping.
+ *    - XY_TEXT_BLT
+ *    - XY_TEXT_IMMEDIATE_BLT
+ *         - blit per cliprect, subject to maximum immediate data size.
+ *    - XY_COLOR_BLT 
+ *         - per pixel or run of pixels
+ *    - XY_PIXEL_BLT
+ *         - good for sparse bitmaps
+ *
+ * 3D engine:
+ *    - Point per pixel
+ *    - Translate bitmap to an alpha texture and render as a quad
+ *    - Chop bitmap up into 32x32 squares and render w/polygon stipple.
+ */
+void
+intelBitmap(GLcontext * ctx,
+	    GLint x, GLint y,
+	    GLsizei width, GLsizei height,
+	    const struct gl_pixelstore_attrib *unpack,
+	    const GLubyte * pixels)
+{
+   if (do_blit_bitmap(ctx, x, y, width, height,
+                          unpack, pixels))
+      return;
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      _mesa_printf("%s: fallback to swrast\n", __FUNCTION__);
+
+   _swrast_Bitmap(ctx, x, y, width, height, unpack, pixels);
+}
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_copy.c b/src/mesa/drivers/dri/intel/intel_pixel_copy.c
new file mode 100644
index 0000000000..c453097e55
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_pixel_copy.c
@@ -0,0 +1,382 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "glheader.h"
+#include "enums.h"
+#include "image.h"
+#include "state.h"
+#include "mtypes.h"
+#include "macros.h"
+#include "swrast/swrast.h"
+
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_ioctl.h"
+#include "intel_batchbuffer.h"
+#include "intel_buffers.h"
+#include "intel_blit.h"
+#include "intel_regions.h"
+#include "intel_tris.h"
+#include "intel_pixel.h"
+
+#define FILE_DEBUG_FLAG DEBUG_PIXEL
+
+static struct intel_region *
+copypix_src_region(struct intel_context *intel, GLenum type)
+{
+   switch (type) {
+   case GL_COLOR:
+      return intel_readbuf_region(intel);
+   case GL_DEPTH:
+      /* Don't think this is really possible execpt at 16bpp, when we have no stencil.
+       */
+      if (intel->depth_region && intel->depth_region->cpp == 2)
+         return intel->depth_region;
+   case GL_STENCIL:
+      /* Don't think this is really possible. 
+       */
+      break;
+   case GL_DEPTH_STENCIL_EXT:
+      /* Does it matter whether it is stencil/depth or depth/stencil?
+       */
+      return intel->depth_region;
+   default:
+      break;
+   }
+
+   return NULL;
+}
+
+
+/**
+ * Check if any fragment operations are in effect which might effect
+ * glCopyPixels.  Differs from intel_check_blit_fragment_ops in that
+ * we allow Scissor.
+ */
+static GLboolean
+intel_check_copypixel_blit_fragment_ops(GLcontext * ctx)
+{
+   if (ctx->NewState)
+      _mesa_update_state(ctx);
+
+   /* Could do logicop with the blitter: 
+    */
+   return !(ctx->_ImageTransferState ||
+            ctx->Color.AlphaEnabled ||
+            ctx->Depth.Test ||
+            ctx->Fog.Enabled ||
+            ctx->Stencil.Enabled ||
+            !ctx->Color.ColorMask[0] ||
+            !ctx->Color.ColorMask[1] ||
+            !ctx->Color.ColorMask[2] ||
+            !ctx->Color.ColorMask[3] ||
+            ctx->Texture._EnabledUnits ||
+	    ctx->FragmentProgram._Enabled ||
+	    ctx->Color.BlendEnabled);
+}
+
+/* Doesn't work for overlapping regions.  Could do a double copy or
+ * just fallback.
+ */
+static GLboolean
+do_texture_copypixels(GLcontext * ctx,
+                      GLint srcx, GLint srcy,
+                      GLsizei width, GLsizei height,
+                      GLint dstx, GLint dsty, GLenum type)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_region *dst = intel_drawbuf_region(intel);
+   struct intel_region *src = copypix_src_region(intel, type);
+   GLenum src_format;
+   GLenum src_type;
+
+   DBG("%s %d,%d %dx%d --> %d,%d\n", __FUNCTION__, 
+       srcx, srcy, width, height, dstx, dsty);
+
+   if (!src || !dst || type != GL_COLOR)
+      return GL_FALSE;
+
+   /* Can't handle overlapping regions.  Don't have sufficient control
+    * over rasterization to pull it off in-place.  Punt on these for
+    * now.
+    * 
+    * XXX: do a copy to a temporary. 
+    */
+   if (src->buffer == dst->buffer) {
+      drm_clip_rect_t srcbox;
+      drm_clip_rect_t dstbox;
+      drm_clip_rect_t tmp;
+
+      srcbox.x1 = srcx;
+      srcbox.y1 = srcy;
+      srcbox.x2 = srcx + width;
+      srcbox.y2 = srcy + height;
+
+      dstbox.x1 = dstx;
+      dstbox.y1 = dsty;
+      dstbox.x2 = dstx + width * ctx->Pixel.ZoomX;
+      dstbox.y2 = dsty + height * ctx->Pixel.ZoomY;
+
+      DBG("src %d,%d %d,%d\n", srcbox.x1, srcbox.y1, srcbox.x2, srcbox.y2);
+      DBG("dst %d,%d %d,%d (%dx%d) (%f,%f)\n", dstbox.x1, dstbox.y1, dstbox.x2, dstbox.y2,
+	  width, height, ctx->Pixel.ZoomX, ctx->Pixel.ZoomY);
+
+      if (intel_intersect_cliprects(&tmp, &srcbox, &dstbox)) {
+         DBG("%s: regions overlap\n", __FUNCTION__);
+         return GL_FALSE;
+      }
+   }
+
+   intelFlush(&intel->ctx);
+
+   intel->vtbl.install_meta_state(intel);
+
+   /* Is this true?  Also will need to turn depth testing on according
+    * to state:
+    */
+   intel->vtbl.meta_no_stencil_write(intel);
+   intel->vtbl.meta_no_depth_write(intel);
+
+   /* Set the 3d engine to draw into the destination region:
+    */
+   intel->vtbl.meta_draw_region(intel, dst, intel->depth_region);
+
+   intel->vtbl.meta_import_pixel_state(intel);
+
+   if (src->cpp == 2) {
+      src_format = GL_RGB;
+      src_type = GL_UNSIGNED_SHORT_5_6_5;
+   }
+   else {
+      src_format = GL_BGRA;
+      src_type = GL_UNSIGNED_BYTE;
+   }
+
+   /* Set the frontbuffer up as a large rectangular texture.
+    */
+   if (!intel->vtbl.meta_tex_rect_source(intel, src->buffer, 0,
+                                         src->pitch,
+                                         src->height, src_format, src_type)) {
+      intel->vtbl.leave_meta_state(intel);
+      return GL_FALSE;
+   }
+
+
+   intel->vtbl.meta_texture_blend_replace(intel);
+
+   LOCK_HARDWARE(intel);
+
+   if (intel->driDrawable->numClipRects) {
+      __DRIdrawablePrivate *dPriv = intel->driDrawable;
+
+
+      srcy = dPriv->h - srcy - height;  /* convert from gl to hardware coords */
+
+      srcx += dPriv->x;
+      srcy += dPriv->y;
+
+      /* Clip against the source region.  This is the only source
+       * clipping we do.  XXX: Just set the texcord wrap mode to clamp
+       * or similar.
+       *
+       */
+      if (0) {
+         GLint orig_x = srcx;
+         GLint orig_y = srcy;
+
+         if (!_mesa_clip_to_region(0, 0, src->pitch, src->height,
+                                   &srcx, &srcy, &width, &height))
+            goto out;
+
+         dstx += srcx - orig_x;
+         dsty += (srcy - orig_y) * ctx->Pixel.ZoomY;
+      }
+
+      /* Just use the regular cliprect mechanism...  Does this need to
+       * even hold the lock???
+       */
+      intel->vtbl.meta_draw_quad(intel,
+				 dstx,
+				 dstx + width * ctx->Pixel.ZoomX,
+				 dPriv->h - (dsty + height * ctx->Pixel.ZoomY),
+				 dPriv->h - (dsty), 0, /* XXX: what z value? */
+				 0x00ff00ff,
+				 srcx, srcx + width, srcy, srcy + height);
+
+    out:
+      intel->vtbl.leave_meta_state(intel);
+      intel_batchbuffer_flush(intel->batch);
+   }
+   UNLOCK_HARDWARE(intel);
+
+   DBG("%s: success\n", __FUNCTION__);
+   return GL_TRUE;
+}
+
+
+
+
+
+/**
+ * CopyPixels with the blitter.  Don't support zooming, pixel transfer, etc.
+ */
+static GLboolean
+do_blit_copypixels(GLcontext * ctx,
+                   GLint srcx, GLint srcy,
+                   GLsizei width, GLsizei height,
+                   GLint dstx, GLint dsty, GLenum type)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_region *dst = intel_drawbuf_region(intel);
+   struct intel_region *src = copypix_src_region(intel, type);
+
+   /* Copypixels can be more than a straight copy.  Ensure all the
+    * extra operations are disabled:
+    */
+   if (!intel_check_copypixel_blit_fragment_ops(ctx) ||
+       ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F)
+      return GL_FALSE;
+
+   if (!src || !dst)
+      return GL_FALSE;
+
+
+
+   intelFlush(&intel->ctx);
+
+   LOCK_HARDWARE(intel);
+
+   if (intel->driDrawable->numClipRects) {
+      __DRIdrawablePrivate *dPriv = intel->driDrawable;
+      drm_clip_rect_t *box = dPriv->pClipRects;
+      drm_clip_rect_t dest_rect;
+      GLint nbox = dPriv->numClipRects;
+      GLint delta_x = 0;
+      GLint delta_y = 0;
+      GLuint i;
+
+      /* Do scissoring in GL coordinates:
+       */
+      if (ctx->Scissor.Enabled)
+      {
+	 GLint x = ctx->Scissor.X;
+	 GLint y = ctx->Scissor.Y;
+	 GLuint w = ctx->Scissor.Width;
+	 GLuint h = ctx->Scissor.Height;
+	 GLint dx = dstx - srcx;
+         GLint dy = dsty - srcy;
+
+         if (!_mesa_clip_to_region(x, y, x+w-1, y+h-1, &dstx, &dsty, &width, &height))
+            goto out;
+	 
+         srcx = dstx - dx;
+         srcy = dsty - dy;
+      }
+
+      /* Convert from GL to hardware coordinates:
+       */
+      dsty = dPriv->h - dsty - height;  
+      srcy = dPriv->h - srcy - height;  
+      dstx += dPriv->x;
+      dsty += dPriv->y;
+      srcx += dPriv->x;
+      srcy += dPriv->y;
+
+      /* Clip against the source region.  This is the only source
+       * clipping we do.  Dst is clipped with cliprects below.
+       */
+      {
+         delta_x = srcx - dstx;
+         delta_y = srcy - dsty;
+
+         if (!_mesa_clip_to_region(0, 0, src->pitch, src->height,
+                                   &srcx, &srcy, &width, &height))
+            goto out;
+
+         dstx = srcx - delta_x;
+         dsty = srcy - delta_y;
+      }
+
+      dest_rect.x1 = dstx;
+      dest_rect.y1 = dsty;
+      dest_rect.x2 = dstx + width;
+      dest_rect.y2 = dsty + height;
+
+      /* Could do slightly more clipping: Eg, take the intersection of
+       * the existing set of cliprects and those cliprects translated
+       * by delta_x, delta_y:
+       * 
+       * This code will not overwrite other windows, but will
+       * introduce garbage when copying from obscured window regions.
+       */
+      for (i = 0; i < nbox; i++) {
+         drm_clip_rect_t rect;
+
+         if (!intel_intersect_cliprects(&rect, &dest_rect, &box[i]))
+            continue;
+
+
+         intelEmitCopyBlit(intel, dst->cpp, 
+			   src->pitch, src->buffer, 0, src->tiled,
+			   dst->pitch, dst->buffer, 0, dst->tiled,
+			   rect.x1 + delta_x, 
+			   rect.y1 + delta_y,       /* srcx, srcy */
+                           rect.x1, rect.y1,    /* dstx, dsty */
+                           rect.x2 - rect.x1, rect.y2 - rect.y1,
+			   ctx->Color.ColorLogicOpEnabled ?
+			   ctx->Color.LogicOp : GL_COPY);
+      }
+
+    out:
+      intel_batchbuffer_flush(intel->batch);
+   }
+   UNLOCK_HARDWARE(intel);
+
+   DBG("%s: success\n", __FUNCTION__);
+   return GL_TRUE;
+}
+
+
+void
+intelCopyPixels(GLcontext * ctx,
+                GLint srcx, GLint srcy,
+                GLsizei width, GLsizei height,
+                GLint destx, GLint desty, GLenum type)
+{
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (do_blit_copypixels(ctx, srcx, srcy, width, height, destx, desty, type))
+      return;
+
+   if (do_texture_copypixels(ctx, srcx, srcy, width, height, destx, desty, type))
+      return;
+
+   DBG("fallback to _swrast_CopyPixels\n");
+
+   _swrast_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type);
+}
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_draw.c b/src/mesa/drivers/dri/intel/intel_pixel_draw.c
new file mode 100644
index 0000000000..566f884be0
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_pixel_draw.c
@@ -0,0 +1,386 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portionsalloc
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "glheader.h"
+#include "enums.h"
+#include "image.h"
+#include "mtypes.h"
+#include "macros.h"
+#include "bufferobj.h"
+#include "swrast/swrast.h"
+
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_ioctl.h"
+#include "intel_batchbuffer.h"
+#include "intel_blit.h"
+#include "intel_buffers.h"
+#include "intel_regions.h"
+#include "intel_pixel.h"
+#include "intel_buffer_objects.h"
+#include "intel_tris.h"
+
+
+
+static GLboolean
+do_texture_drawpixels(GLcontext * ctx,
+                      GLint x, GLint y,
+                      GLsizei width, GLsizei height,
+                      GLenum format, GLenum type,
+                      const struct gl_pixelstore_attrib *unpack,
+                      const GLvoid * pixels)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_region *dst = intel_drawbuf_region(intel);
+   struct intel_buffer_object *src = intel_buffer_object(unpack->BufferObj);
+   GLuint rowLength = unpack->RowLength ? unpack->RowLength : width;
+   GLuint src_offset;
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   intelFlush(&intel->ctx);
+   intel->vtbl.render_start(intel);
+   intel->vtbl.emit_state(intel);
+
+   if (!dst)
+      return GL_FALSE;
+
+   if (src) {
+      if (!_mesa_validate_pbo_access(2, unpack, width, height, 1,
+                                     format, type, pixels)) {
+         _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawPixels");
+         return GL_TRUE;
+      }
+   }
+   else {
+      /* PBO only for now:
+       */
+/*       _mesa_printf("%s - not PBO\n", __FUNCTION__); */
+      return GL_FALSE;
+   }
+
+   /* There are a couple of things we can't do yet, one of which is
+    * set the correct state for pixel operations when GL texturing is
+    * enabled.  That's a pretty rare state and probably not worth the
+    * effort.  A completely device-independent version of this may do
+    * more.
+    *
+    * Similarly, we make no attempt to merge metaops processing with
+    * an enabled fragment program, though it would certainly be
+    * possible.
+    */
+   if (!intel_check_meta_tex_fragment_ops(ctx)) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("%s - bad GL fragment state for metaops texture\n",
+                      __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   intel->vtbl.install_meta_state(intel);
+
+
+   /* Is this true?  Also will need to turn depth testing on according
+    * to state:
+    */
+   intel->vtbl.meta_no_stencil_write(intel);
+   intel->vtbl.meta_no_depth_write(intel);
+
+   /* Set the 3d engine to draw into the destination region:
+    */
+   intel->vtbl.meta_draw_region(intel, dst, intel->depth_region);
+
+   intel->vtbl.meta_import_pixel_state(intel);
+
+   src_offset = (GLuint) _mesa_image_address(2, unpack, pixels, width, height,
+                                             format, type, 0, 0, 0);
+
+
+   /* Setup the pbo up as a rectangular texture, if possible.
+    *
+    * TODO: This is almost always possible if the i915 fragment
+    * program is adjusted to correctly swizzle the sampled colors.
+    * The major exception is any 24bit texture, like RGB888, for which
+    * there is no hardware support.  
+    */
+   if (!intel->vtbl.meta_tex_rect_source(intel, src->buffer, src_offset,
+                                         rowLength, height, format, type)) {
+      intel->vtbl.leave_meta_state(intel);
+      return GL_FALSE;
+   }
+
+   intel->vtbl.meta_texture_blend_replace(intel);
+
+
+   LOCK_HARDWARE(intel);
+
+   if (intel->driDrawable->numClipRects) {
+      __DRIdrawablePrivate *dPriv = intel->driDrawable;
+      GLint srcx, srcy;
+      GLint dstx, dsty;
+
+      dstx = x;
+      dsty = dPriv->h - (y + height);
+
+      srcx = 0;                 /* skiprows/pixels already done */
+      srcy = 0;
+
+      if (0) {
+         const GLint orig_x = dstx;
+         const GLint orig_y = dsty;
+
+         if (!_mesa_clip_to_region(0, 0, dst->pitch, dst->height,
+                                   &dstx, &dsty, &width, &height))
+            goto out;
+
+         srcx += dstx - orig_x;
+         srcy += dsty - orig_y;
+      }
+
+
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("draw %d,%d %dx%d\n", dstx, dsty, width, height);
+
+      /* Must use the regular cliprect mechanism in order to get the
+       * drawing origin set correctly.  Otherwise scissor state is in
+       * incorrect coordinate space.  Does this even need to hold the
+       * lock???
+       */
+      intel->vtbl.meta_draw_quad(intel,
+				 dstx, dstx + width * ctx->Pixel.ZoomX,
+				 dPriv->h - (y + height * ctx->Pixel.ZoomY),
+				 dPriv->h - (y),
+				 -ctx->Current.RasterPos[2] * .5,
+				 0x00ff00ff,
+				 srcx, srcx + width, srcy + height, srcy);
+    out:
+      intel->vtbl.leave_meta_state(intel);
+      intel_batchbuffer_flush(intel->batch);
+   }
+   UNLOCK_HARDWARE(intel);
+   return GL_TRUE;
+}
+
+
+
+
+
+/* Pros:  
+ *   - no waiting for idle before updating framebuffer.
+ *   
+ * Cons:
+ *   - if upload is by memcpy, this may actually be slower than fallback path.
+ *   - uploads the whole image even if destination is clipped
+ *   
+ * Need to benchmark.
+ *
+ * Given the questions about performance, implement for pbo's only.
+ * This path is definitely a win if the pbo is already in agp.  If it
+ * turns out otherwise, we can add the code necessary to upload client
+ * data to agp space before performing the blit.  (Though it may turn
+ * out to be better/simpler just to use the texture engine).
+ */
+static GLboolean
+do_blit_drawpixels(GLcontext * ctx,
+                   GLint x, GLint y,
+                   GLsizei width, GLsizei height,
+                   GLenum format, GLenum type,
+                   const struct gl_pixelstore_attrib *unpack,
+                   const GLvoid * pixels)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_region *dest = intel_drawbuf_region(intel);
+   struct intel_buffer_object *src = intel_buffer_object(unpack->BufferObj);
+   GLuint src_offset;
+   GLuint rowLength;
+   dri_fence *fence = NULL;
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      _mesa_printf("%s\n", __FUNCTION__);
+
+
+   if (!dest) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("%s - no dest\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   if (src) {
+      /* This validation should be done by core mesa:
+       */
+      if (!_mesa_validate_pbo_access(2, unpack, width, height, 1,
+                                     format, type, pixels)) {
+         _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawPixels");
+         return GL_TRUE;
+      }
+   }
+   else {
+      /* PBO only for now:
+       */
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("%s - not PBO\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   if (!intel_check_blit_format(dest, format, type)) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("%s - bad format for blit\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   if (!intel_check_blit_fragment_ops(ctx)) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("%s - bad GL fragment state for blitter\n",
+                      __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   if (ctx->Pixel.ZoomX != 1.0F) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("%s - bad PixelZoomX for blit\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+
+   if (unpack->RowLength > 0)
+      rowLength = unpack->RowLength;
+   else
+      rowLength = width;
+
+   if (ctx->Pixel.ZoomY == -1.0F) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("%s - bad PixelZoomY for blit\n", __FUNCTION__);
+      return GL_FALSE;          /* later */
+      y -= height;
+   }
+   else if (ctx->Pixel.ZoomY == 1.0F) {
+      rowLength = -rowLength;
+   }
+   else {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("%s - bad PixelZoomY for blit\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   src_offset = (GLuint) _mesa_image_address(2, unpack, pixels, width, height,
+                                             format, type, 0, 0, 0);
+
+   intelFlush(&intel->ctx);
+   LOCK_HARDWARE(intel);
+
+   if (intel->driDrawable->numClipRects) {
+      __DRIdrawablePrivate *dPriv = intel->driDrawable;
+      int nbox = dPriv->numClipRects;
+      drm_clip_rect_t *box = dPriv->pClipRects;
+      drm_clip_rect_t rect;
+      drm_clip_rect_t dest_rect;
+      dri_bo *src_buffer = intel_bufferobj_buffer(intel, src, INTEL_READ);
+      int i;
+
+      dest_rect.x1 = dPriv->x + x;
+      dest_rect.y1 = dPriv->y + dPriv->h - (y + height);
+      dest_rect.x2 = dest_rect.x1 + width;
+      dest_rect.y2 = dest_rect.y1 + height;
+
+      for (i = 0; i < nbox; i++) {
+         if (!intel_intersect_cliprects(&rect, &dest_rect, &box[i]))
+            continue;
+
+         intelEmitCopyBlit(intel,
+                           dest->cpp,
+                           rowLength, src_buffer, src_offset, GL_FALSE,
+                           dest->pitch, dest->buffer, 0, dest->tiled,
+                           rect.x1 - dest_rect.x1,
+                           rect.y2 - dest_rect.y2,
+                           rect.x1,
+                           rect.y1, rect.x2 - rect.x1, rect.y2 - rect.y1,
+			   ctx->Color.ColorLogicOpEnabled ?
+			   ctx->Color.LogicOp : GL_COPY);
+      }
+      intel_batchbuffer_flush(intel->batch);
+      fence = intel->batch->last_fence;
+      dri_fence_reference(fence);
+   }
+   UNLOCK_HARDWARE(intel);
+
+   if (fence) {
+      dri_fence_wait(fence);
+      dri_fence_unreference(fence);
+   }
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      _mesa_printf("%s - DONE\n", __FUNCTION__);
+
+   return GL_TRUE;
+}
+
+
+
+void
+intelDrawPixels(GLcontext * ctx,
+                GLint x, GLint y,
+                GLsizei width, GLsizei height,
+                GLenum format,
+                GLenum type,
+                const struct gl_pixelstore_attrib *unpack,
+                const GLvoid * pixels)
+{
+   if (do_blit_drawpixels(ctx, x, y, width, height, format, type,
+                          unpack, pixels))
+      return;
+
+   if (do_texture_drawpixels(ctx, x, y, width, height, format, type,
+                             unpack, pixels))
+      return;
+
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      _mesa_printf("%s: fallback to swrast\n", __FUNCTION__);
+
+   if (ctx->FragmentProgram._Current == ctx->FragmentProgram._TexEnvProgram) {
+      /*
+       * We don't want the i915 texenv program to be applied to DrawPixels.
+       * This is really just a performance optimization (mesa will other-
+       * wise happily run the fragment program on each pixel in the image).
+       */
+      struct gl_fragment_program *fpSave = ctx->FragmentProgram._Current;
+   /* can't just set current frag prog to 0 here as on buffer resize
+      we'll get new state checks which will segfault. Remains a hack. */
+      ctx->FragmentProgram._Current = NULL;
+      ctx->FragmentProgram._UseTexEnvProgram = GL_FALSE;
+      ctx->FragmentProgram._Active = GL_FALSE;
+      _swrast_DrawPixels( ctx, x, y, width, height, format, type,
+                          unpack, pixels );
+      ctx->FragmentProgram._Current = fpSave;
+      ctx->FragmentProgram._UseTexEnvProgram = GL_TRUE;
+      ctx->FragmentProgram._Active = GL_TRUE;
+   }
+   else {
+      _swrast_DrawPixels( ctx, x, y, width, height, format, type,
+                          unpack, pixels );
+   }
+}
diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h
index 9e885c3b3b..37629c07e2 100644
--- a/src/mesa/drivers/dri/intel/intel_reg.h
+++ b/src/mesa/drivers/dri/intel/intel_reg.h
@@ -61,6 +61,9 @@
 
 #define XY_SRC_COPY_BLT_CMD             (CMD_2D | (0x53 << 22) | 6)
 
+#define XY_TEXT_IMMEDIATE_BLIT_CMD	(CMD_2D | (0x31 << 22))
+# define XY_TEXT_BYTE_PACKED		(1 << 16)
+
 /* BR00 */
 #define XY_BLT_WRITE_ALPHA	(1 << 21)
 #define XY_BLT_WRITE_RGB	(1 << 20)
diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c
index a47b288090..a5de01a3a8 100644
--- a/src/mesa/drivers/dri/intel/intel_regions.c
+++ b/src/mesa/drivers/dri/intel/intel_regions.c
@@ -49,23 +49,6 @@
 
 #define FILE_DEBUG_FLAG DEBUG_REGION
 
-void
-intel_region_idle(struct intel_context *intel, struct intel_region *region)
-{
-   DBG("%s\n", __FUNCTION__);
-   /* XXX: Using this function is likely bogus -- it ought to only have been
-    * used before a map, anyway, but leave this cheap implementation of it
-    * for now.
-    */
-   if (region && region->buffer) {
-      /* Mapping it for read will ensure that any acceleration to the region
-       * would have landed already.
-       */
-      dri_bo_map(region->buffer, GL_TRUE);
-      dri_bo_unmap(region->buffer);
-   }
-}
-
 /* XXX: Thread safety?
  */
 GLubyte *
@@ -195,6 +178,8 @@ intel_region_data(struct intel_context *intel,
                   const void *src, GLuint src_pitch,
                   GLuint srcx, GLuint srcy, GLuint width, GLuint height)
 {
+   GLboolean locked = GL_FALSE;
+
    DBG("%s\n", __FUNCTION__);
 
    if (intel == NULL)
@@ -208,8 +193,10 @@ intel_region_data(struct intel_context *intel,
          intel_region_cow(intel, dst);
    }
 
-
-   LOCK_HARDWARE(intel);
+   if (!intel->locked) {
+      LOCK_HARDWARE(intel);
+      locked = GL_TRUE;
+   }
 
    _mesa_copy_rect(intel_region_map(intel, dst) + dst_offset,
                    dst->cpp,
@@ -218,7 +205,8 @@ intel_region_data(struct intel_context *intel,
 
    intel_region_unmap(intel, dst);
 
-   UNLOCK_HARDWARE(intel);
+   if (locked)
+      UNLOCK_HARDWARE(intel);
 
 }
 
@@ -459,6 +447,7 @@ intel_recreate_static_regions(struct intel_context *intel)
 			    &intelScreen->back,
 			    DRM_BO_FLAG_MEM_TT);
 
+#ifdef I915
    if (intelScreen->third.handle) {
       intel->third_region =
 	 intel_recreate_static(intel, "third",
@@ -466,6 +455,7 @@ intel_recreate_static_regions(struct intel_context *intel)
 			       &intelScreen->third,
 			       DRM_BO_FLAG_MEM_TT);
    }
+#endif /* I915 */
 
    /* Still assumes front.cpp == depth.cpp.  We can kill this when we move to
     * private buffers.
diff --git a/src/mesa/drivers/dri/intel/intel_regions.h b/src/mesa/drivers/dri/intel/intel_regions.h
index 1975d729e4..b6a3b5a739 100644
--- a/src/mesa/drivers/dri/intel/intel_regions.h
+++ b/src/mesa/drivers/dri/intel/intel_regions.h
@@ -73,9 +73,6 @@ void intel_region_release(struct intel_region **ib);
 
 void intel_recreate_static_regions(struct intel_context *intel);
 
-void intel_region_idle(struct intel_context *intel,
-		       struct intel_region *ib);
-
 /* Map/unmap regions.  This is refcounted also: 
  */
 GLubyte *intel_region_map(struct intel_context *intel,
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c
index e1f62bd70e..cd72a4b122 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -41,10 +41,11 @@
 #include "intel_buffers.h"
 #include "intel_tex.h"
 #include "intel_span.h"
-#include "intel_tris.h"
 #include "intel_ioctl.h"
 #include "intel_fbo.h"
+#include "intel_chipset.h"
 
+#include "i915_drm.h"
 #include "i830_dri.h"
 #include "intel_regions.h"
 #include "intel_batchbuffer.h"
@@ -571,9 +572,9 @@ extern GLboolean i830CreateContext(const __GLcontextModes * mesaVis,
 extern GLboolean i915CreateContext(const __GLcontextModes * mesaVis,
                                    __DRIcontextPrivate * driContextPriv,
                                    void *sharedContextPrivate);
-
-
-
+extern GLboolean brwCreateContext(const __GLcontextModes * mesaVis,
+				  __DRIcontextPrivate * driContextPriv,
+				  void *sharedContextPrivate);
 
 static GLboolean
 intelCreateContext(const __GLcontextModes * mesaVis,
@@ -583,29 +584,21 @@ intelCreateContext(const __GLcontextModes * mesaVis,
    __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
    intelScreenPrivate *intelScreen = (intelScreenPrivate *) sPriv->private;
 
-   switch (intelScreen->deviceID) {
-      /* Don't deal with i830 until texture work complete:
-       */
-   case PCI_CHIP_845_G:
-   case PCI_CHIP_I830_M:
-   case PCI_CHIP_I855_GM:
-   case PCI_CHIP_I865_G:
+#ifdef I915
+   if (IS_9XX(intelScreen->deviceID)) {
+      if (!IS_965(intelScreen->deviceID)) {
+	 return i915CreateContext(mesaVis, driContextPriv,
+				  sharedContextPrivate);
+      }
+   } else {
       return i830CreateContext(mesaVis, driContextPriv, sharedContextPrivate);
-
-   case PCI_CHIP_I915_G:
-   case PCI_CHIP_I915_GM:
-   case PCI_CHIP_I945_G:
-   case PCI_CHIP_I945_GM:
-   case PCI_CHIP_I945_GME:
-   case PCI_CHIP_G33_G:
-   case PCI_CHIP_Q35_G:
-   case PCI_CHIP_Q33_G:
-      return i915CreateContext(mesaVis, driContextPriv, sharedContextPrivate);
-
-   default:
-      fprintf(stderr, "Unrecognized deviceID %x\n", intelScreen->deviceID);
-      return GL_FALSE;
    }
+#else
+   if (IS_965(intelScreen->deviceID))
+      return brwCreateContext(mesaVis, driContextPriv, sharedContextPrivate);
+#endif
+   fprintf(stderr, "Unrecognized deviceID %x\n", intelScreen->deviceID);
+   return GL_FALSE;
 }
 
 
@@ -625,7 +618,9 @@ static const struct __DriverAPIRec intelAPI = {
    .WaitForSBC = NULL,
    .SwapBuffersMSC = NULL,
    .CopySubBuffer = intelCopySubBuffer,
+#ifdef I915
    .setTexOffset = intelSetTexOffset,
+#endif
 };
 
 
@@ -723,7 +718,11 @@ intelFillInModes(unsigned pixel_bits, unsigned depth_bits,
  */
 PUBLIC __GLcontextModes *__driDriverInitScreen(__DRIscreenPrivate *psp)
 {
+#ifdef I915
    static const __DRIversion ddx_expected = { 1, 5, 0 };
+#else
+   static const __DRIversion ddx_expected = { 1, 6, 0 };
+#endif
    static const __DRIversion dri_expected = { 4, 0, 0 };
    static const __DRIversion drm_expected = { 1, 5, 0 };
    I830DRIPtr dri_priv = (I830DRIPtr) psp->pDevPriv;
diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c
index b08dee43bc..f016b6b4dc 100644
--- a/src/mesa/drivers/dri/intel/intel_tex.c
+++ b/src/mesa/drivers/dri/intel/intel_tex.c
@@ -1,4 +1,6 @@
+#include "swrast/swrast.h"
 #include "texobj.h"
+#include "mipmap.h"
 #include "intel_context.h"
 #include "intel_mipmap_tree.h"
 #include "intel_tex.h"
@@ -155,6 +157,46 @@ timed_memcpy(void *dest, const void *src, size_t n)
 }
 #endif /* DO_DEBUG */
 
+/**
+ * Generate new mipmap data from BASE+1 to BASE+p (the minimally-sized mipmap
+ * level).
+ *
+ * The texture object's miptree must be mapped.
+ *
+ * It would be really nice if this was just called by Mesa whenever mipmaps
+ * needed to be regenerated, rather than us having to remember to do so in
+ * each texture image modification path.
+ *
+ * This function should also include an accelerated path.
+ */
+void
+intel_generate_mipmap(GLcontext *ctx, GLenum target,
+                      const struct gl_texture_unit *texUnit,
+                      struct gl_texture_object *texObj)
+{
+   struct intel_texture_object *intelObj = intel_texture_object(texObj);
+   GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+   int face, i;
+
+   _mesa_generate_mipmap(ctx, target, texUnit, texObj);
+
+   /* Update the level information in our private data in the new images, since
+    * it didn't get set as part of a normal TexImage path.
+    */
+   for (face = 0; face < nr_faces; face++) {
+      for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) {
+         struct intel_texture_image *intelImage;
+
+	 intelImage = intel_texture_image(texObj->Image[face][i]);
+	 if (intelImage == NULL)
+	    break;
+
+	 intelImage->level = i;
+	 intelImage->face = face;
+      }
+   }
+}
+
 
 void
 intelInitTextureFuncs(struct dd_function_table *functions)
@@ -166,10 +208,17 @@ intelInitTextureFuncs(struct dd_function_table *functions)
    functions->TexSubImage1D = intelTexSubImage1D;
    functions->TexSubImage2D = intelTexSubImage2D;
    functions->TexSubImage3D = intelTexSubImage3D;
+#ifdef I915
    functions->CopyTexImage1D = intelCopyTexImage1D;
    functions->CopyTexImage2D = intelCopyTexImage2D;
    functions->CopyTexSubImage1D = intelCopyTexSubImage1D;
    functions->CopyTexSubImage2D = intelCopyTexSubImage2D;
+#else
+   functions->CopyTexImage1D = _swrast_copy_teximage1d;
+   functions->CopyTexImage2D = _swrast_copy_teximage2d;
+   functions->CopyTexSubImage1D = _swrast_copy_texsubimage1d;
+   functions->CopyTexSubImage2D = _swrast_copy_texsubimage2d;
+#endif
    functions->GetTexImage = intelGetTexImage;
 
    /* compressed texture functions */
diff --git a/src/mesa/drivers/dri/intel/intel_tex.h b/src/mesa/drivers/dri/intel/intel_tex.h
index b77d7a1d8a..2973e0ceb9 100644
--- a/src/mesa/drivers/dri/intel/intel_tex.h
+++ b/src/mesa/drivers/dri/intel/intel_tex.h
@@ -148,4 +148,8 @@ void intel_tex_unmap_images(struct intel_context *intel,
 
 int intel_compressed_num_bytes(GLuint mesaFormat);
 
+void intel_generate_mipmap(GLcontext *ctx, GLenum target,
+			   const struct gl_texture_unit *texUnit,
+			   struct gl_texture_object *texObj);
+
 #endif
diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c
index f1a455a04c..521ce06640 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c
@@ -29,6 +29,7 @@
 #include "enums.h"
 #include "image.h"
 #include "teximage.h"
+#include "mipmap.h"
 #include "swrast/swrast.h"
 
 #include "intel_screen.h"
@@ -85,12 +86,14 @@ get_teximage_source(struct intel_context *intel, GLenum internalFormat)
 
 static GLboolean
 do_copy_texsubimage(struct intel_context *intel,
+		    GLenum target,
                     struct intel_texture_image *intelImage,
                     GLenum internalFormat,
                     GLint dstx, GLint dsty,
                     GLint x, GLint y, GLsizei width, GLsizei height)
 {
    GLcontext *ctx = &intel->ctx;
+   struct gl_texture_object *texObj = intelImage->base.TexObject;
    const struct intel_region *src =
       get_teximage_source(intel, internalFormat);
 
@@ -156,16 +159,12 @@ do_copy_texsubimage(struct intel_context *intel,
 
    UNLOCK_HARDWARE(intel);
 
-#if 0
-   /* GL_SGIS_generate_mipmap -- this can be accelerated now.
-    * XXX Add a ctx->Driver.GenerateMipmaps() function?
-    */
-   if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
+   /* GL_SGIS_generate_mipmap */
+   if (intelImage->level == texObj->BaseLevel && texObj->GenerateMipmap) {
       intel_generate_mipmap(ctx, target,
                             &ctx->Texture.Unit[ctx->Texture.CurrentUnit],
                             texObj);
    }
-#endif
 
    return GL_TRUE;
 }
@@ -197,7 +196,7 @@ intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level,
                           GL_RGBA, CHAN_TYPE, NULL,
                           &ctx->DefaultPacking, texObj, texImage);
 
-   if (!do_copy_texsubimage(intel_context(ctx),
+   if (!do_copy_texsubimage(intel_context(ctx), target,
                             intel_texture_image(texImage),
                             internalFormat, 0, 0, x, y, width, 1))
       goto fail;
@@ -234,7 +233,7 @@ intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level,
                           &ctx->DefaultPacking, texObj, texImage);
 
 
-   if (!do_copy_texsubimage(intel_context(ctx),
+   if (!do_copy_texsubimage(intel_context(ctx), target,
                             intel_texture_image(texImage),
                             internalFormat, 0, 0, x, y, width, height))
       goto fail;
@@ -264,7 +263,7 @@ intelCopyTexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
    /* Need to check texture is compatible with source format. 
     */
 
-   if (!do_copy_texsubimage(intel_context(ctx),
+   if (!do_copy_texsubimage(intel_context(ctx), target,
                             intel_texture_image(texImage),
                             internalFormat, xoffset, 0, x, y, width, 1)) {
       _swrast_copy_texsubimage1d(ctx, target, level, xoffset, x, y, width);
@@ -290,7 +289,7 @@ intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
    /* Need to check texture is compatible with source format. 
     */
 
-   if (!do_copy_texsubimage(intel_context(ctx),
+   if (!do_copy_texsubimage(intel_context(ctx), target,
                             intel_texture_image(texImage),
                             internalFormat,
                             xoffset, yoffset, x, y, width, height)) {
diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c
index fd800a7bb6..4f5f75d049 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_image.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_image.c
@@ -457,10 +457,6 @@ intelTexImage(GLcontext * ctx,
    if (!pixels)
       return;
 
-
-   if (intelImage->mt)
-      intel_region_idle(intel, intelImage->mt->region);
-
    LOCK_HARDWARE(intel);
 
    if (intelImage->mt) {
@@ -470,6 +466,7 @@ intelTexImage(GLcontext * ctx,
                                                intelImage->level,
                                                &dstRowStride,
                                                intelImage->base.ImageOffsets);
+      texImage->RowStride = dstRowStride / intelImage->mt->cpp;
    }
    else {
       /* Allocate regular memory and store the image there temporarily.   */
@@ -487,8 +484,8 @@ intelTexImage(GLcontext * ctx,
       texImage->Data = malloc(sizeInBytes);
    }
 
-   DBG("Upload image %dx%dx%d row_len %x "
-       "pitch %x\n",
+   DBG("Upload image %dx%dx%d row_len %d "
+       "pitch %d\n",
        width, height, depth, width * texelBytes, dstRowStride);
 
    /* Copy data.  Would like to know when it's ok for us to eg. use
@@ -508,6 +505,13 @@ intelTexImage(GLcontext * ctx,
       _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
    }
 
+   /* GL_SGIS_generate_mipmap */
+   if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
+      intel_generate_mipmap(ctx, target,
+                            &ctx->Texture.Unit[ctx->Texture.CurrentUnit],
+                            texObj);
+   }
+
    _mesa_unmap_teximage_pbo(ctx, unpack);
 
    if (intelImage->mt) {
@@ -516,16 +520,6 @@ intelTexImage(GLcontext * ctx,
    }
 
    UNLOCK_HARDWARE(intel);
-
-#if 0
-   /* GL_SGIS_generate_mipmap -- this can be accelerated now.
-    */
-   if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
-      intel_generate_mipmap(ctx, target,
-                            &ctx->Texture.Unit[ctx->Texture.CurrentUnit],
-                            texObj);
-   }
-#endif
 }
 
 void
diff --git a/src/mesa/drivers/dri/intel/intel_tex_layout.c b/src/mesa/drivers/dri/intel/intel_tex_layout.c
index 4da636021b..edc3a2eaa4 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_layout.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_layout.c
@@ -52,7 +52,7 @@ GLuint intel_compressed_alignment(GLenum internalFormat)
     return alignment;
 }
 
-void i945_miptree_layout_2d( struct intel_mipmap_tree *mt )
+void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tree *mt )
 {
    GLint align_h = 2, align_w = 4;
    GLuint level;
@@ -92,7 +92,7 @@ void i945_miptree_layout_2d( struct intel_mipmap_tree *mt )
    /* Pitch must be a whole number of dwords, even though we
     * express it in texels.
     */
-   mt->pitch = ALIGN(mt->pitch * mt->cpp, 4) / mt->cpp;
+   mt->pitch = intel_miptree_pitch_align (intel, mt, mt->pitch);
    mt->total_height = 0;
 
    for ( level = mt->first_level ; level <= mt->last_level ; level++ ) {
diff --git a/src/mesa/drivers/dri/intel/intel_tex_layout.h b/src/mesa/drivers/dri/intel/intel_tex_layout.h
index 99d41c3629..193699d3f7 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_layout.h
+++ b/src/mesa/drivers/dri/intel/intel_tex_layout.h
@@ -38,5 +38,5 @@ static GLuint minify( GLuint d )
    return MAX2(1, d>>1);
 }
 
-extern void i945_miptree_layout_2d( struct intel_mipmap_tree *mt );
+extern void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tree *mt );
 extern GLuint intel_compressed_alignment(GLenum);
diff --git a/src/mesa/drivers/dri/intel/intel_tex_subimage.c b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
index 32de2cfb48..bd27b86bf3 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
@@ -64,9 +64,6 @@ intelTexSubimage(GLcontext * ctx,
    if (!pixels)
       return;
 
-   if (intelImage->mt)
-      intel_region_idle(intel, intelImage->mt->region);
-
    LOCK_HARDWARE(intel);
 
    /* Map buffer if necessary.  Need to lock to prevent other contexts
@@ -93,14 +90,12 @@ intelTexSubimage(GLcontext * ctx,
       _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage");
    }
 
-#if 0
    /* GL_SGIS_generate_mipmap */
    if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
-      _mesa_generate_mipmap(ctx, target,
+      intel_generate_mipmap(ctx, target,
                             &ctx->Texture.Unit[ctx->Texture.CurrentUnit],
                             texObj);
    }
-#endif
 
    _mesa_unmap_teximage_pbo(ctx, packing);
 
diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c
index af18c26d55..d260a721d9 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c
@@ -40,6 +40,7 @@ intel_calculate_first_last_level(struct intel_texture_object *intelObj)
          firstLevel = lastLevel = tObj->BaseLevel;
       }
       else {
+#ifdef I915
          firstLevel = tObj->BaseLevel + (GLint) (tObj->MinLod + 0.5);
          firstLevel = MAX2(firstLevel, tObj->BaseLevel);
          lastLevel = tObj->BaseLevel + (GLint) (tObj->MaxLod + 0.5);
@@ -47,6 +48,18 @@ intel_calculate_first_last_level(struct intel_texture_object *intelObj)
          lastLevel = MIN2(lastLevel, tObj->BaseLevel + baseImage->MaxLog2);
          lastLevel = MIN2(lastLevel, tObj->MaxLevel);
          lastLevel = MAX2(firstLevel, lastLevel);       /* need at least one level */
+#else
+	 /* Currently not taking min/max lod into account here, those
+	  * values are programmed as sampler state elsewhere and we
+	  * upload the same mipmap levels regardless.  Not sure if
+	  * this makes sense as it means it isn't possible for the app
+	  * to use min/max lod to reduce texture memory pressure:
+	  */
+	 firstLevel = tObj->BaseLevel;
+	 lastLevel = MIN2(tObj->BaseLevel + baseImage->MaxLog2,
+			  tObj->MaxLevel);
+	 lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */
+#endif
       }
       break;
    case GL_TEXTURE_RECTANGLE_NV:
@@ -62,6 +75,10 @@ intel_calculate_first_last_level(struct intel_texture_object *intelObj)
    intelObj->lastLevel = lastLevel;
 }
 
+/**
+ * Copies the image's contents at its level into the object's miptree,
+ * and updates the image to point at the object's miptree.
+ */
 static void
 copy_image_data_to_tree(struct intel_context *intel,
                         struct intel_texture_object *intelObj,
@@ -211,8 +228,15 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
       }
    }
 
+#ifdef I915
+   /* XXX: what is this flush about?
+    * On 965, it causes a batch flush in the middle of the state relocation
+    * emits, which means that the eventual rendering doesn't have all of the
+    * required relocations in place.
+    */
    if (need_flush)
       intel_batchbuffer_flush(intel->batch);
+#endif
 
    return GL_TRUE;
 }
diff --git a/src/mesa/drivers/dri/mga/mgarender.c b/src/mesa/drivers/dri/mga/mgarender.c
index 3080cea79f..c151f79915 100644
--- a/src/mesa/drivers/dri/mga/mgarender.c
+++ b/src/mesa/drivers/dri/mga/mgarender.c
@@ -144,7 +144,7 @@ static GLboolean mga_run_render( GLcontext *ctx,
 
    for (i = 0 ; i < VB->PrimitiveCount ; i++)
    {
-      GLuint prim = VB->Primitive[i].mode;
+      GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
       GLuint start = VB->Primitive[i].start;
       GLuint length = VB->Primitive[i].count;
 
diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c
index 2ad35d4390..78347d3cc2 100644
--- a/src/mesa/drivers/dri/r200/r200_tcl.c
+++ b/src/mesa/drivers/dri/r200/r200_tcl.c
@@ -488,7 +488,7 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx,
 
    for (i = 0 ; i < VB->PrimitiveCount ; i++)
    {
-      GLuint prim = VB->Primitive[i].mode;
+      GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
       GLuint start = VB->Primitive[i].start;
       GLuint length = VB->Primitive[i].count;
 
diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
index e9c9df1222..f2af532834 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.c
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
@@ -179,7 +179,7 @@ DRI_CONF_OPT_BEGIN_V(fp_optimization,enum,def,"0:1") \
         DRI_CONF_DESC_END \
 DRI_CONF_OPT_END
 
-const char __driConfigOptions[] =
+PUBLIC const char __driConfigOptions[] =
 DRI_CONF_BEGIN
 	DRI_CONF_SECTION_PERFORMANCE
 		DRI_CONF_TCL_MODE(DRI_CONF_TCL_CODEGEN)
@@ -652,6 +652,13 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
       screen->chip_flags = RADEON_CHIPSET_TCL;
       break;
 
+   /* RV410 SE chips have half the pipes of regular RV410 */
+   case PCI_CHIP_RV410_5E4C:
+   case PCI_CHIP_RV410_5E4F:
+      screen->chip_family = CHIP_FAMILY_RV380;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
    case PCI_CHIP_RV410_564A:
    case PCI_CHIP_RV410_564B:
    case PCI_CHIP_RV410_564F:
@@ -660,9 +667,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
    case PCI_CHIP_RV410_5E48:
    case PCI_CHIP_RV410_5E4A:
    case PCI_CHIP_RV410_5E4B:
-   case PCI_CHIP_RV410_5E4C:
    case PCI_CHIP_RV410_5E4D:
-   case PCI_CHIP_RV410_5E4F:
       screen->chip_family = CHIP_FAMILY_RV410;
       screen->chip_flags = RADEON_CHIPSET_TCL;
       break;
diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c
index 5ad044c262..0f4baf2e7d 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c
@@ -418,7 +418,7 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx,
 
    for (i = 0 ; i < VB->PrimitiveCount ; i++)
    {
-      GLuint prim = VB->Primitive[i].mode;
+      GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
       GLuint start = VB->Primitive[i].start;
       GLuint length = VB->Primitive[i].count;
 
diff --git a/src/mesa/drivers/dri/s3v/s3v_render.c b/src/mesa/drivers/dri/s3v/s3v_render.c
index 6aaa94976e..6b86e0e604 100644
--- a/src/mesa/drivers/dri/s3v/s3v_render.c
+++ b/src/mesa/drivers/dri/s3v/s3v_render.c
@@ -170,7 +170,7 @@ static GLboolean s3v_run_render( GLcontext *ctx,
 
 	for (i = 0 ; i < VB->PrimitiveCount ; i++ )
 	{
-                GLuint prim = VB->Primitive[i].mode;
+		GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
 		GLuint start = VB->Primitive[i].start;
 		GLuint length = VB->Primitive[i].count;
 
diff --git a/src/mesa/drivers/dri/savage/savagerender.c b/src/mesa/drivers/dri/savage/savagerender.c
index 514434c427..f2f19984b1 100644
--- a/src/mesa/drivers/dri/savage/savagerender.c
+++ b/src/mesa/drivers/dri/savage/savagerender.c
@@ -198,7 +198,7 @@ static GLboolean savage_run_render( GLcontext *ctx,
 
    for (i = 0 ; i < VB->PrimitiveCount ; i++)
    {
-      GLuint prim = VB->Primitive[i].mode;
+      GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
       GLuint start = VB->Primitive[i].start;
       GLuint length = VB->Primitive[i].count;
 
diff --git a/src/mesa/drivers/dri/unichrome/via_render.c b/src/mesa/drivers/dri/unichrome/via_render.c
index 387473ef52..d7e373d7dd 100644
--- a/src/mesa/drivers/dri/unichrome/via_render.c
+++ b/src/mesa/drivers/dri/unichrome/via_render.c
@@ -106,7 +106,7 @@ static GLboolean via_run_fastrender(GLcontext *ctx,
     tnl->clipspace.new_inputs |= VERT_BIT_POS;
 
     for (i = 0; i < VB->PrimitiveCount; ++i) {
-        GLuint mode = VB->Primitive[i].mode;
+        GLuint mode = _tnl_translate_prim(&VB->Primitive[i]);
         GLuint start = VB->Primitive[i].start;
         GLuint length = VB->Primitive[i].count;
         if (length)
diff --git a/src/mesa/drivers/dri/unichrome/via_screen.c b/src/mesa/drivers/dri/unichrome/via_screen.c
index 0ad18b4300..8d6cebeb79 100644
--- a/src/mesa/drivers/dri/unichrome/via_screen.c
+++ b/src/mesa/drivers/dri/unichrome/via_screen.c
@@ -48,7 +48,7 @@
 
 #include "xmlpool.h"
 
-const char __driConfigOptions[] =
+PUBLIC const char __driConfigOptions[] =
 DRI_CONF_BEGIN
     DRI_CONF_SECTION_PERFORMANCE
         DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
diff --git a/src/mesa/main/context.h b/src/mesa/main/context.h
index 099912aa15..d7f92c463e 100644
--- a/src/mesa/main/context.h
+++ b/src/mesa/main/context.h
@@ -272,10 +272,10 @@ do {									\
    (((CTX)->Light.Enabled &&						\
      (CTX)->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR)	\
     || (CTX)->Fog.ColorSumEnabled					\
-    || ((CTX)->VertexProgram._Enabled &&				\
-        ((CTX)->VertexProgram.Current->Base.InputsRead & VERT_BIT_COLOR1)) \
-    || ((CTX)->FragmentProgram._Enabled &&				\
-        ((CTX)->FragmentProgram.Current->Base.InputsRead & FRAG_BIT_COL1)) \
+    || ((CTX)->VertexProgram._Current &&				\
+        ((CTX)->VertexProgram._Current->Base.InputsRead & VERT_BIT_COLOR1)) \
+    || ((CTX)->FragmentProgram._Current &&				\
+        ((CTX)->FragmentProgram._Current->Base.InputsRead & FRAG_BIT_COL1)) \
    )
 
 
diff --git a/src/mesa/main/depthstencil.c b/src/mesa/main/depthstencil.c
index d4990bb795..fb54d6184d 100644
--- a/src/mesa/main/depthstencil.c
+++ b/src/mesa/main/depthstencil.c
@@ -213,7 +213,7 @@ put_values_z24(GLcontext *ctx, struct gl_renderbuffer *z24rb, GLuint count,
                const void *values, const GLubyte *mask)
 {
    struct gl_renderbuffer *dsrb = z24rb->Wrapped;
-   const GLubyte *src = (const GLubyte *) values;
+   const GLuint *src = (const GLuint *) values;
    ASSERT(z24rb->DataType == GL_UNSIGNED_INT);
    ASSERT(dsrb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT);
    ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT);
diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c
index a9260c847b..44357fbd6a 100644
--- a/src/mesa/main/mipmap.c
+++ b/src/mesa/main/mipmap.c
@@ -531,20 +531,19 @@ make_1d_mipmap(const struct gl_texture_format *format, GLint border,
 }
 
 
-/**
- * XXX need to use the tex image's row stride!
- */
 static void
 make_2d_mipmap(const struct gl_texture_format *format, GLint border,
-               GLint srcWidth, GLint srcHeight, const GLubyte *srcPtr,
-               GLint dstWidth, GLint dstHeight, GLubyte *dstPtr)
+               GLint srcWidth, GLint srcHeight,
+	       const GLubyte *srcPtr, GLint srcRowStride,
+               GLint dstWidth, GLint dstHeight,
+	       GLubyte *dstPtr, GLint dstRowStride)
 {
    const GLint bpt = format->TexelBytes;
    const GLint srcWidthNB = srcWidth - 2 * border;  /* sizes w/out border */
    const GLint dstWidthNB = dstWidth - 2 * border;
    const GLint dstHeightNB = dstHeight - 2 * border;
-   const GLint srcRowStride = bpt * srcWidth;
-   const GLint dstRowStride = bpt * dstWidth;
+   const GLint srcRowBytes = bpt * srcRowStride;
+   const GLint dstRowBytes = bpt * dstRowStride;
    const GLubyte *srcA, *srcB;
    GLubyte *dst;
    GLint row;
@@ -552,7 +551,7 @@ make_2d_mipmap(const struct gl_texture_format *format, GLint border,
    /* Compute src and dst pointers, skipping any border */
    srcA = srcPtr + border * ((srcWidth + 1) * bpt);
    if (srcHeight > 1) 
-      srcB = srcA + srcRowStride;
+      srcB = srcA + srcRowBytes;
    else
       srcB = srcA;
    dst = dstPtr + border * ((dstWidth + 1) * bpt);
@@ -560,9 +559,9 @@ make_2d_mipmap(const struct gl_texture_format *format, GLint border,
    for (row = 0; row < dstHeightNB; row++) {
       do_row(format, srcWidthNB, srcA, srcB,
              dstWidthNB, dst);
-      srcA += 2 * srcRowStride;
-      srcB += 2 * srcRowStride;
-      dst += dstRowStride;
+      srcA += 2 * srcRowBytes;
+      srcB += 2 * srcRowBytes;
+      dst += dstRowBytes;
    }
 
    /* This is ugly but probably won't be used much */
@@ -620,9 +619,9 @@ make_2d_mipmap(const struct gl_texture_format *format, GLint border,
 static void
 make_3d_mipmap(const struct gl_texture_format *format, GLint border,
                GLint srcWidth, GLint srcHeight, GLint srcDepth,
-               const GLubyte *srcPtr,
+               const GLubyte *srcPtr, GLint srcRowStride,
                GLint dstWidth, GLint dstHeight, GLint dstDepth,
-               GLubyte *dstPtr)
+               GLubyte *dstPtr, GLint dstRowStride)
 {
    const GLint bpt = format->TexelBytes;
    const GLint srcWidthNB = srcWidth - 2 * border;  /* sizes w/out border */
@@ -717,13 +716,13 @@ make_3d_mipmap(const struct gl_texture_format *format, GLint border,
    /* Luckily we can leverage the make_2d_mipmap() function here! */
    if (border > 0) {
       /* do front border image */
-      make_2d_mipmap(format, 1, srcWidth, srcHeight, srcPtr,
-                     dstWidth, dstHeight, dstPtr);
+      make_2d_mipmap(format, 1, srcWidth, srcHeight, srcPtr, srcRowStride,
+                     dstWidth, dstHeight, dstPtr, dstRowStride);
       /* do back border image */
       make_2d_mipmap(format, 1, srcWidth, srcHeight,
-                     srcPtr + bytesPerSrcImage * (srcDepth - 1),
+                     srcPtr + bytesPerSrcImage * (srcDepth - 1), srcRowStride,
                      dstWidth, dstHeight,
-                     dstPtr + bytesPerDstImage * (dstDepth - 1));
+                     dstPtr + bytesPerDstImage * (dstDepth - 1), dstRowStride);
       /* do four remaining border edges that span the image slices */
       if (srcDepth == dstDepth) {
          /* just copy border pixels from src to dst */
@@ -798,15 +797,16 @@ make_3d_mipmap(const struct gl_texture_format *format, GLint border,
 
 static void
 make_1d_stack_mipmap(const struct gl_texture_format *format, GLint border,
-                     GLint srcWidth, const GLubyte *srcPtr,
-                     GLint dstWidth, GLint dstHeight, GLubyte *dstPtr)
+                     GLint srcWidth, const GLubyte *srcPtr, GLuint srcRowStride,
+                     GLint dstWidth, GLint dstHeight,
+		     GLubyte *dstPtr, GLuint dstRowStride )
 {
    const GLint bpt = format->TexelBytes;
    const GLint srcWidthNB = srcWidth - 2 * border;  /* sizes w/out border */
    const GLint dstWidthNB = dstWidth - 2 * border;
    const GLint dstHeightNB = dstHeight - 2 * border;
-   const GLint srcRowStride = bpt * srcWidth;
-   const GLint dstRowStride = bpt * dstWidth;
+   const GLint srcRowBytes = bpt * srcRowStride;
+   const GLint dstRowBytes = bpt * dstRowStride;
    const GLubyte *src;
    GLubyte *dst;
    GLint row;
@@ -818,8 +818,8 @@ make_1d_stack_mipmap(const struct gl_texture_format *format, GLint border,
    for (row = 0; row < dstHeightNB; row++) {
       do_row(format, srcWidthNB, src, src,
              dstWidthNB, dst);
-      src += srcRowStride;
-      dst += dstRowStride;
+      src += srcRowBytes;
+      dst += dstRowBytes;
    }
 
    if (border) {
@@ -840,17 +840,18 @@ make_1d_stack_mipmap(const struct gl_texture_format *format, GLint border,
  */
 static void
 make_2d_stack_mipmap(const struct gl_texture_format *format, GLint border,
-                     GLint srcWidth, GLint srcHeight, const GLubyte *srcPtr,
+                     GLint srcWidth, GLint srcHeight,
+		     const GLubyte *srcPtr, GLint srcRowStride,
                      GLint dstWidth, GLint dstHeight, GLint dstDepth,
-                     GLubyte *dstPtr)
+                     GLubyte *dstPtr, GLint dstRowStride)
 {
    const GLint bpt = format->TexelBytes;
    const GLint srcWidthNB = srcWidth - 2 * border;  /* sizes w/out border */
    const GLint dstWidthNB = dstWidth - 2 * border;
    const GLint dstHeightNB = dstHeight - 2 * border;
    const GLint dstDepthNB = dstDepth - 2 * border;
-   const GLint srcRowStride = bpt * srcWidth;
-   const GLint dstRowStride = bpt * dstWidth;
+   const GLint srcRowBytes = bpt * srcRowStride;
+   const GLint dstRowBytes = bpt * dstRowStride;
    const GLubyte *srcA, *srcB;
    GLubyte *dst;
    GLint layer;
@@ -859,7 +860,7 @@ make_2d_stack_mipmap(const struct gl_texture_format *format, GLint border,
    /* Compute src and dst pointers, skipping any border */
    srcA = srcPtr + border * ((srcWidth + 1) * bpt);
    if (srcHeight > 1) 
-      srcB = srcA + srcRowStride;
+      srcB = srcA + srcRowBytes;
    else
       srcB = srcA;
    dst = dstPtr + border * ((dstWidth + 1) * bpt);
@@ -868,9 +869,9 @@ make_2d_stack_mipmap(const struct gl_texture_format *format, GLint border,
       for (row = 0; row < dstHeightNB; row++) {
          do_row(format, srcWidthNB, srcA, srcB,
                 dstWidthNB, dst);
-         srcA += 2 * srcRowStride;
-         srcB += 2 * srcRowStride;
-         dst += dstRowStride;
+         srcA += 2 * srcRowBytes;
+         srcB += 2 * srcRowBytes;
+         dst += dstRowBytes;
       }
 
       /* This is ugly but probably won't be used much */
@@ -1132,23 +1133,28 @@ _mesa_generate_mipmap(GLcontext *ctx, GLenum target,
          case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
          case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
             make_2d_mipmap(convertFormat, border,
-                           srcWidth, srcHeight, srcData,
-                           dstWidth, dstHeight, dstData);
+                           srcWidth, srcHeight, srcData, srcImage->RowStride,
+                           dstWidth, dstHeight, dstData, dstImage->RowStride);
             break;
          case GL_TEXTURE_3D:
             make_3d_mipmap(convertFormat, border,
-                           srcWidth, srcHeight, srcDepth, srcData,
-                           dstWidth, dstHeight, dstDepth, dstData);
+                           srcWidth, srcHeight, srcDepth,
+			   srcData, srcImage->RowStride,
+                           dstWidth, dstHeight, dstDepth,
+			   dstData, dstImage->RowStride);
             break;
          case GL_TEXTURE_1D_ARRAY_EXT:
             make_1d_stack_mipmap(convertFormat, border,
-                                 srcWidth, srcData,
-                                 dstWidth, dstHeight, dstData);
+                                 srcWidth, srcData, srcImage->RowStride,
+                                 dstWidth, dstHeight,
+				 dstData, dstImage->RowStride);
             break;
          case GL_TEXTURE_2D_ARRAY_EXT:
             make_2d_stack_mipmap(convertFormat, border,
-                                 srcWidth, srcHeight, srcData,
-                                 dstWidth, dstHeight, dstDepth, dstData);
+                                 srcWidth, srcHeight,
+				 srcData, srcImage->RowStride,
+                                 dstWidth, dstHeight,
+				 dstDepth, dstData, dstImage->RowStride);
             break;
          case GL_TEXTURE_RECTANGLE_NV:
             /* no mipmaps, do nothing */
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 94b7094388..0da487ea04 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1365,7 +1365,7 @@ struct gl_texture_image
    GLboolean IsCompressed;	/**< GL_ARB_texture_compression */
    GLuint CompressedSize;	/**< GL_ARB_texture_compression */
 
-   GLuint RowStride;		/**< == Width unless IsClientData and padded */
+   GLuint RowStride;		/**< Padded width in units of texels */
    GLuint *ImageOffsets;        /**< if 3D texture: array [Depth] of offsets to
                                      each 2D slice in 'Data', in texels */
    GLvoid *Data;		/**< Image data, accessed via FetchTexel() */
diff --git a/src/mesa/shader/shader_api.c b/src/mesa/shader/shader_api.c
index a1e73ef125..82c2b857ff 100644
--- a/src/mesa/shader/shader_api.c
+++ b/src/mesa/shader/shader_api.c
@@ -369,6 +369,54 @@ copy_string(GLchar *dst, GLsizei maxLength, GLsizei *length, const GLchar *src)
 
 
 /**
+ * Return size (in floats) of the given GLSL type.
+ * See also _slang_sizeof_type_specifier().
+ */
+static GLint
+sizeof_glsl_type(GLenum type)
+{
+   switch (type) {
+   case GL_BOOL:
+   case GL_FLOAT:
+   case GL_INT:
+      return 1;
+   case GL_BOOL_VEC2:
+   case GL_FLOAT_VEC2:
+   case GL_INT_VEC2:
+      return 2;
+   case GL_BOOL_VEC3:
+   case GL_FLOAT_VEC3:
+   case GL_INT_VEC3:
+      return 3;
+   case GL_BOOL_VEC4:
+   case GL_FLOAT_VEC4:
+   case GL_INT_VEC4:
+      return 4;
+   case GL_FLOAT_MAT2:
+      return 8;  /* 2 rows of 4, actually */
+   case GL_FLOAT_MAT3:
+      return 12;  /* 3 rows of 4, actually */
+   case GL_FLOAT_MAT4:
+      return 16;
+   case GL_FLOAT_MAT2x3:
+      return 6;
+   case GL_FLOAT_MAT2x4:
+      return 8;
+   case GL_FLOAT_MAT3x2:
+      return 12;  /* 3 rows of 4, actually */
+   case GL_FLOAT_MAT3x4:
+      return 12;
+   case GL_FLOAT_MAT4x2:
+      return 16;  /* 4 rows of 4, actually */
+   case GL_FLOAT_MAT4x3:
+      return 12;
+   default:
+      return 0; /* error */
+   }
+}
+
+
+/**
  * Called via ctx->Driver.AttachShader()
  */
 void
@@ -665,13 +713,17 @@ _mesa_get_active_uniform(GLcontext *ctx, GLuint program, GLuint index,
       if (shProg->Uniforms->Parameters[j].Type == PROGRAM_UNIFORM ||
           shProg->Uniforms->Parameters[j].Type == PROGRAM_SAMPLER) {
          if (ind == index) {
+            GLuint uSize = shProg->Uniforms->Parameters[j].Size;
+            GLenum uType = shProg->Uniforms->Parameters[j].DataType;
             /* found it */
             copy_string(nameOut, maxLength, length,
                         shProg->Uniforms->Parameters[j].Name);
-            if (size)
-               *size = shProg->Uniforms->Parameters[j].Size;
+            if (size) {
+               /* convert from floats to 'type' (eg: sizeof(mat4x4)=1) */
+               *size = uSize / sizeof_glsl_type(uType);
+            }
             if (type)
-               *type = shProg->Uniforms->Parameters[j].DataType;
+               *type = uType;
             return;
          }
          ind++;
diff --git a/src/mesa/swrast/s_context.c b/src/mesa/swrast/s_context.c
index 3bf9804bef..314931d467 100644
--- a/src/mesa/swrast/s_context.c
+++ b/src/mesa/swrast/s_context.c
@@ -117,7 +117,7 @@ _swrast_update_rasterflags( GLcontext *ctx )
 
 
 /**
- * Examine polycon culls tate to compute the _BackfaceCullSign field.
+ * Examine polygon cull state to compute the _BackfaceCullSign field.
  * _BackfaceCullSign will be 0 if no culling, -1 if culling back-faces,
  * and 1 if culling front-faces.  The Polygon FrontFace state also
  * factors in.
@@ -128,21 +128,17 @@ _swrast_update_polygon( GLcontext *ctx )
    GLfloat backface_sign;
 
    if (ctx->Polygon.CullFlag) {
-      backface_sign = 1.0;
       switch (ctx->Polygon.CullFaceMode) {
       case GL_BACK:
-	 if (ctx->Polygon.FrontFace == GL_CCW)
-	    backface_sign = -1.0;
+         backface_sign = -1.0;
 	 break;
       case GL_FRONT:
-	 if (ctx->Polygon.FrontFace != GL_CCW)
-	    backface_sign = -1.0;
+         backface_sign = 1.0;
 	 break;
       case GL_FRONT_AND_BACK:
          /* fallthrough */
       default:
 	 backface_sign = 0.0;
-	 break;
       }
    }
    else {
diff --git a/src/mesa/swrast/s_drawpix.c b/src/mesa/swrast/s_drawpix.c
index 0cf425e1c6..969787381c 100644
--- a/src/mesa/swrast/s_drawpix.c
+++ b/src/mesa/swrast/s_drawpix.c
@@ -842,7 +842,8 @@ _swrast_DrawPixels( GLcontext *ctx,
                                      format, type, pixels)) {
          _mesa_error(ctx, GL_INVALID_OPERATION,
                      "glDrawPixels(invalid PBO access)");
-         goto end;
+         RENDER_FINISH(swrast, ctx);
+	 return;
       }
       buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
                                               GL_READ_ONLY_ARB,
@@ -850,7 +851,8 @@ _swrast_DrawPixels( GLcontext *ctx,
       if (!buf) {
          /* buffer is already mapped - that's an error */
          _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawPixels(PBO is mapped)");
-         goto end;
+         RENDER_FINISH(swrast, ctx);
+	 return;
       }
       pixels = ADD_POINTERS(buf, pixels);
    }
@@ -890,8 +892,6 @@ _swrast_DrawPixels( GLcontext *ctx,
       /* don't return yet, clean-up */
    }
 
-end:
-
    RENDER_FINISH(swrast,ctx);
 
    if (unpack->BufferObj->Name) {
diff --git a/src/mesa/swrast/s_readpix.c b/src/mesa/swrast/s_readpix.c
index 916ddc1b97..8df15c8704 100644
--- a/src/mesa/swrast/s_readpix.c
+++ b/src/mesa/swrast/s_readpix.c
@@ -570,7 +570,8 @@ _swrast_ReadPixels( GLcontext *ctx,
    /* Do all needed clipping here, so that we can forget about it later */
    if (!_mesa_clip_readpixels(ctx, &x, &y, &width, &height, &clippedPacking)) {
       /* The ReadPixels region is totally outside the window bounds */
-      goto end;
+      RENDER_FINISH(swrast, ctx);
+      return;
    }
 
    if (clippedPacking.BufferObj->Name) {
@@ -580,7 +581,8 @@ _swrast_ReadPixels( GLcontext *ctx,
                                      format, type, pixels)) {
          _mesa_error(ctx, GL_INVALID_OPERATION,
                      "glReadPixels(invalid PBO access)");
-         goto end;
+	 RENDER_FINISH(swrast, ctx);
+	 return;
       }
       buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT,
                                               GL_WRITE_ONLY_ARB,
@@ -588,7 +590,8 @@ _swrast_ReadPixels( GLcontext *ctx,
       if (!buf) {
          /* buffer is already mapped - that's an error */
          _mesa_error(ctx, GL_INVALID_OPERATION, "glReadPixels(PBO is mapped)");
-         goto end;
+	 RENDER_FINISH(swrast, ctx);
+	 return;
       }
       pixels = ADD_POINTERS(buf, pixels);
    }
@@ -629,8 +632,6 @@ _swrast_ReadPixels( GLcontext *ctx,
          /* don't return yet, clean-up */
    }
 
-
-end:
    RENDER_FINISH(swrast, ctx);
 
    if (clippedPacking.BufferObj->Name) {
diff --git a/src/mesa/swrast_setup/ss_triangle.c b/src/mesa/swrast_setup/ss_triangle.c
index 4817d23932..5a0f1f2429 100644
--- a/src/mesa/swrast_setup/ss_triangle.c
+++ b/src/mesa/swrast_setup/ss_triangle.c
@@ -294,10 +294,8 @@ void _swsetup_choose_trifuncs( GLcontext *ctx )
        ctx->Polygon.OffsetFill)
       ind |= SS_OFFSET_BIT;
 
-   /* Note: gl_FrontFacing lives in fragment input FOGC.Y at this time */
    if ((ctx->Light.Enabled && ctx->Light.Model.TwoSide) ||
-       (ctx->VertexProgram._Enabled && ctx->VertexProgram.TwoSideEnabled) ||
-       (ctx->FragmentProgram._Current && ctx->FragmentProgram._Current->Base.InputsRead & (1 << FRAG_ATTRIB_FOGC)))
+       (ctx->VertexProgram._Current && ctx->VertexProgram.TwoSideEnabled))
       ind |= SS_TWOSIDE_BIT;
 
    /* We piggyback the two-sided stencil front/back determination on the