20 files changed, 730 insertions, 1409 deletions
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
index a594fb6cc4..9ad9f6a6c0 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -29,6 +29,7 @@
 #include "intel_ioctl.h"
 #include "intel_decode.h"
 #include "intel_reg.h"
+#include "intel_bufmgr.h"
 
 /* Relocations in kernel space:
  *    - pass dma buffer seperately
@@ -78,11 +79,17 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch)
       batch->buf = NULL;
    }
 
+   if (!batch->buffer && intel->ttm == GL_TRUE)
+      batch->buffer = malloc (intel->maxBatchSize);
+
    batch->buf = dri_bo_alloc(intel->bufmgr, "batchbuffer",
-			     intel->maxBatchSize, 4096,
-			     DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED);
-   dri_bo_map(batch->buf, GL_TRUE);
-   batch->map = batch->buf->virtual;
+			     intel->maxBatchSize, 4096);
+   if (batch->buffer)
+      batch->map = batch->buffer;
+   else {
+      dri_bo_map(batch->buf, GL_TRUE);
+      batch->map = batch->buf->virtual;
+   }
    batch->size = intel->maxBatchSize;
    batch->ptr = batch->map;
    batch->dirty_state = ~0;
@@ -99,7 +106,6 @@ intel_batchbuffer_alloc(struct intel_context *intel)
    struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1);
 
    batch->intel = intel;
-   batch->last_fence = NULL;
    intel_batchbuffer_reset(batch);
 
    return batch;
@@ -108,14 +114,13 @@ intel_batchbuffer_alloc(struct intel_context *intel)
 void
 intel_batchbuffer_free(struct intel_batchbuffer *batch)
 {
-   if (batch->last_fence) {
-      dri_fence_wait(batch->last_fence);
-      dri_fence_unreference(batch->last_fence);
-      batch->last_fence = NULL;
-   }
-   if (batch->map) {
-      dri_bo_unmap(batch->buf);
-      batch->map = NULL;
+   if (batch->buffer)
+      free (batch->buffer);
+   else {
+      if (batch->map) {
+	 dri_bo_unmap(batch->buf);
+	 batch->map = NULL;
+      }
    }
    dri_bo_unreference(batch->buf);
    batch->buf = NULL;
@@ -131,11 +136,12 @@ do_flush_locked(struct intel_batchbuffer *batch,
 		GLuint used, GLboolean allow_unlock)
 {
    struct intel_context *intel = batch->intel;
-   void *start;
-   GLuint count;
+   int ret = 0;
 
-   dri_bo_unmap(batch->buf);
-   start = dri_process_relocs(batch->buf, &count);
+   if (batch->buffer)
+      dri_bo_subdata (batch->buf, 0, used, batch->buffer);
+   else
+      dri_bo_unmap(batch->buf);
 
    batch->map = NULL;
    batch->ptr = NULL;
@@ -148,21 +154,25 @@ do_flush_locked(struct intel_batchbuffer *batch,
    if (!(intel->numClipRects == 0 &&
 	 batch->cliprect_mode == LOOP_CLIPRECTS)) {
       if (intel->ttm == GL_TRUE) {
-	 intel_exec_ioctl(batch->intel,
-			  used,
-			  batch->cliprect_mode != LOOP_CLIPRECTS,
-			  allow_unlock,
-			  start, count, &batch->last_fence);
+	 struct drm_i915_gem_execbuffer *execbuf;
+
+	 execbuf = dri_process_relocs(batch->buf);
+	 ret = intel_exec_ioctl(batch->intel,
+				used,
+				batch->cliprect_mode != LOOP_CLIPRECTS,
+				allow_unlock,
+				execbuf);
       } else {
-	 intel_batch_ioctl(batch->intel,
-			   batch->buf->offset,
-			   used,
-			   batch->cliprect_mode != LOOP_CLIPRECTS,
-			   allow_unlock);
+	 dri_process_relocs(batch->buf);
+	 ret = intel_batch_ioctl(batch->intel,
+				 batch->buf->offset,
+				 used,
+				 batch->cliprect_mode != LOOP_CLIPRECTS,
+				 allow_unlock);
       }
    }
-      
-   dri_post_submit(batch->buf, &batch->last_fence);
+
+   dri_post_submit(batch->buf);
 
    if (intel->numClipRects == 0 &&
        batch->cliprect_mode == LOOP_CLIPRECTS) {
@@ -187,6 +197,10 @@ do_flush_locked(struct intel_batchbuffer *batch,
 	 intel->vtbl.debug_batch(intel);
    }
 
+   if (ret != 0) {
+      UNLOCK_HARDWARE(intel);
+      exit(1);
+   }
    intel->vtbl.new_batch(intel);
 }
 
@@ -204,21 +218,27 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
    if (INTEL_DEBUG & DEBUG_BATCH)
       fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line,
 	      used);
-   /* Add the MI_BATCH_BUFFER_END.  Always add an MI_FLUSH - this is a
-    * performance drain that we would like to avoid.
-    */
-   if (used & 4) {
-      ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd();
-      ((int *) batch->ptr)[1] = 0;
-      ((int *) batch->ptr)[2] = MI_BATCH_BUFFER_END;
-      used += 12;
+
+   /* Emit a flush if the bufmgr doesn't do it for us. */
+   if (!intel->ttm) {
+      *(GLuint *) (batch->ptr) = intel->vtbl.flush_cmd();
+      batch->ptr += 4;
+      used = batch->ptr - batch->map;
    }
-   else {
-      ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd();
-      ((int *) batch->ptr)[1] = MI_BATCH_BUFFER_END;
-      used += 8;
+
+   /* Round batchbuffer usage to 2 DWORDs. */
+
+   if ((used & 4) == 0) {
+      *(GLuint *) (batch->ptr) = 0; /* noop */
+      batch->ptr += 4;
+      used = batch->ptr - batch->map;
    }
 
+   /* Mark the end of the buffer. */
+   *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END; /* noop */
+   batch->ptr += 4;
+   used = batch->ptr - batch->map;
+
    /* Workaround for recursive batchbuffer flushing: If the window is
     * moved, we can get into a case where we try to flush during a
     * flush.  What happens is that when we try to grab the lock for
@@ -230,6 +250,9 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
     * avoid that in the first place. */
    batch->ptr = batch->map;
 
+   if (intel->vtbl.finish_batch)
+      intel->vtbl.finish_batch(intel);
+
    /* TODO: Just pass the relocation list and dma buffer up to the
     * kernel.
     */
@@ -242,9 +265,13 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
       UNLOCK_HARDWARE(intel);
 
    if (INTEL_DEBUG & DEBUG_SYNC) {
+      int irq;
+
       fprintf(stderr, "waiting for idle\n");
-      if (batch->last_fence != NULL)
-	 dri_fence_wait(batch->last_fence);
+      LOCK_HARDWARE(intel);
+      irq = intelEmitIrqLocked(intel);
+      UNLOCK_HARDWARE(intel);
+      intelWaitIrq(intel, irq);
    }
 
    /* Reset the buffer:
@@ -252,25 +279,22 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
    intel_batchbuffer_reset(batch);
 }
 
-void
-intel_batchbuffer_finish(struct intel_batchbuffer *batch)
-{
-   intel_batchbuffer_flush(batch);
-   if (batch->last_fence != NULL)
-      dri_fence_wait(batch->last_fence);
-}
-
 
 /*  This is the only way buffers get added to the validate list.
  */
 GLboolean
 intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
                              dri_bo *buffer,
-                             GLuint flags, GLuint delta)
+                             uint32_t read_domains, uint32_t write_domain,
+			     uint32_t delta)
 {
    int ret;
 
-   ret = dri_emit_reloc(batch->buf, flags, delta, batch->ptr - batch->map, buffer);
+   if (batch->ptr - batch->map > batch->buf->size)
+    _mesa_printf ("bad relocation ptr %p map %p offset %d size %d\n",
+		  batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size);
+   ret = intel_bo_emit_reloc(batch->buf, read_domains, write_domain,
+			     delta, batch->ptr - batch->map, buffer);
 
    /*
     * Using the old buffer offset, write in what the right data would be, in case
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
index 2d636df2ce..d3c656c803 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
@@ -40,7 +40,8 @@ struct intel_batchbuffer
    struct intel_context *intel;
 
    dri_bo *buf;
-   dri_fence *last_fence;
+
+   GLubyte *buffer;
 
    GLubyte *map;
    GLubyte *ptr;
@@ -58,8 +59,6 @@ struct intel_batchbuffer *intel_batchbuffer_alloc(struct intel_context
 void intel_batchbuffer_free(struct intel_batchbuffer *batch);
 
 
-void intel_batchbuffer_finish(struct intel_batchbuffer *batch);
-
 void _intel_batchbuffer_flush(struct intel_batchbuffer *batch,
 			      const char *file, int line);
 
@@ -82,14 +81,16 @@ void intel_batchbuffer_release_space(struct intel_batchbuffer *batch,
 
 GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
                                        dri_bo *buffer,
-                                       GLuint flags, GLuint offset);
+				       uint32_t read_domains,
+				       uint32_t write_domain,
+				       uint32_t offset);
 
 /* Inline functions - might actually be better off with these
  * non-inlined.  Certainly better off switching all command packets to
  * be passed as structs rather than dwords, but that's a little bit of
  * work...
  */
-static INLINE GLuint
+static INLINE GLint
 intel_batchbuffer_space(struct intel_batchbuffer *batch)
 {
    return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map);
@@ -134,9 +135,10 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
 
 #define OUT_BATCH(d)  intel_batchbuffer_emit_dword(intel->batch, d)
 
-#define OUT_RELOC(buf, cliprect_mode, delta) do { 			\
+#define OUT_RELOC(buf, read_domains, write_domain, delta) do {		\
    assert((delta) >= 0);						\
-   intel_batchbuffer_emit_reloc(intel->batch, buf, cliprect_mode, delta); \
+   intel_batchbuffer_emit_reloc(intel->batch, buf,			\
+				read_domains, write_domain, delta);	\
 } while (0)
 
 #define ADVANCE_BATCH() do { } while(0)
diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c
index d9dbbb2482..80d11a01b7 100644
--- a/src/mesa/drivers/dri/intel/intel_blit.c
+++ b/src/mesa/drivers/dri/intel/intel_blit.c
@@ -66,14 +66,6 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
 
    intelScreen = intel->intelScreen;
 
-   if (intel->last_swap_fence) {
-      dri_fence_wait(intel->last_swap_fence);
-      dri_fence_unreference(intel->last_swap_fence);
-      intel->last_swap_fence = NULL;
-   }
-   intel->last_swap_fence = intel->first_swap_fence;
-   intel->first_swap_fence = NULL;
-
    /* The LOCK_HARDWARE is required for the cliprects.  Buffer offsets
     * should work regardless.
     */
@@ -156,19 +148,26 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
 	 OUT_BATCH((box.y1 << 16) | box.x1);
 	 OUT_BATCH((box.y2 << 16) | box.x2);
 
-	 OUT_RELOC(dst->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, 0);
+	 OUT_RELOC(dst->buffer,
+		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+		   0);
 	 OUT_BATCH((src_y << 16) | src_x);
 	 OUT_BATCH(src_pitch);
-	 OUT_RELOC(src->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0);
+	 OUT_RELOC(src->buffer,
+		   I915_GEM_DOMAIN_RENDER, 0,
+		   0);
 	 ADVANCE_BATCH();
       }
 
-      if (intel->first_swap_fence)
-	 dri_fence_unreference(intel->first_swap_fence);
+      /* Emit a flush so that, on systems where we don't have automatic flushing
+       * set (such as 965), the results all land on the screen in a timely
+       * fashion.
+       */
+      BEGIN_BATCH(1, IGNORE_CLIPRECTS);
+      OUT_BATCH(MI_FLUSH);
+      ADVANCE_BATCH();
+
       intel_batchbuffer_flush(intel->batch);
-      intel->first_swap_fence = intel->batch->last_fence;
-      if (intel->first_swap_fence)
-	 dri_fence_reference(intel->first_swap_fence);
    }
 
    UNLOCK_HARDWARE(intel);
@@ -225,7 +224,9 @@ intelEmitFillBlit(struct intel_context *intel,
    OUT_BATCH(BR13 | dst_pitch);
    OUT_BATCH((y << 16) | x);
    OUT_BATCH(((y + h) << 16) | (x + w));
-   OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset);
+   OUT_RELOC(dst_buffer,
+	     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+	     dst_offset);
    OUT_BATCH(color);
    ADVANCE_BATCH();
 }
@@ -342,11 +343,13 @@ intelEmitCopyBlit(struct intel_context *intel,
       OUT_BATCH(BR13 | dst_pitch);
       OUT_BATCH((dst_y << 16) | dst_x);
       OUT_BATCH((dst_y2 << 16) | dst_x2);
-      OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
+      OUT_RELOC(dst_buffer,
+		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
 		dst_offset);
       OUT_BATCH((src_y << 16) | src_x);
       OUT_BATCH(src_pitch);
-      OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+      OUT_RELOC(src_buffer,
+		I915_GEM_DOMAIN_RENDER, 0,
 		src_offset);
       ADVANCE_BATCH();
    }
@@ -359,14 +362,20 @@ intelEmitCopyBlit(struct intel_context *intel,
       OUT_BATCH(BR13 | ((uint16_t)dst_pitch));
       OUT_BATCH((0 << 16) | dst_x);
       OUT_BATCH((h << 16) | dst_x2);
-      OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
+      OUT_RELOC(dst_buffer,
+		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
 		dst_offset + dst_y * dst_pitch);
       OUT_BATCH((0 << 16) | src_x);
       OUT_BATCH(src_pitch);
-      OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+      OUT_RELOC(src_buffer,
+		I915_GEM_DOMAIN_RENDER, 0,
 		src_offset + src_y * src_pitch);
       ADVANCE_BATCH();
    }
+   BEGIN_BATCH(1, NO_LOOP_CLIPRECTS);
+   OUT_BATCH(MI_FLUSH);
+   ADVANCE_BATCH();
+   intel_batchbuffer_flush(intel->batch);
 }
 
 
@@ -538,7 +547,8 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
                OUT_BATCH(BR13);
                OUT_BATCH((b.y1 << 16) | b.x1);
                OUT_BATCH((b.y2 << 16) | b.x2);
-               OUT_RELOC(write_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
+               OUT_RELOC(write_buffer,
+			 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                          irb_region->draw_offset);
                OUT_BATCH(clearVal);
                ADVANCE_BATCH();
@@ -611,7 +621,9 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
    OUT_BATCH(br13);
    OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
    OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */
-   OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset);
+   OUT_RELOC(dst_buffer,
+	     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+	     dst_offset);
    OUT_BATCH(0); /* bg */
    OUT_BATCH(fg_color); /* fg */
    OUT_BATCH(0); /* pattern base addr */
diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index 951b8cbfb7..4227f0c973 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -45,8 +45,7 @@ intel_bufferobj_alloc_buffer(struct intel_context *intel,
 			     struct intel_buffer_object *intel_obj)
 {
    intel_obj->buffer = dri_bo_alloc(intel->bufmgr, "bufferobj",
-				    intel_obj->Base.Size, 64,
-				    DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED);
+				    intel_obj->Base.Size, 64);
 }
 
 /**
diff --git a/src/mesa/drivers/dri/intel/intel_buffers.c b/src/mesa/drivers/dri/intel/intel_buffers.c
index 2a25f079e9..d5257ae27e 100644
--- a/src/mesa/drivers/dri/intel/intel_buffers.c
+++ b/src/mesa/drivers/dri/intel/intel_buffers.c
@@ -848,6 +848,8 @@ intelSwapBuffers(__DRIdrawablePrivate * dPriv)
 
 	 intel_fb->swap_ust = ust;
       }
+      drmCommandNone(intel->driFd, DRM_I915_GEM_THROTTLE);
+
    }
    else {
       /* XXX this shouldn't be an error but we can't handle it for now */
diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c
deleted file mode 100644
index 194814e8fb..0000000000
--- a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c
+++ /dev/null
@@ -1,1122 +0,0 @@
-/**************************************************************************
- *
- * Copyright � 2007 Red Hat Inc.
- * Copyright � 2007 Intel Corporation
- * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- *
- **************************************************************************/
-/*
- * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com>
- *          Keith Whitwell <keithw-at-tungstengraphics-dot-com>
- *	    Eric Anholt <eric@anholt.net>
- *	    Dave Airlie <airlied@linux.ie>
- */
-
-#include <xf86drm.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <assert.h>
-
-#include "errno.h"
-#include "mtypes.h"
-#include "dri_bufmgr.h"
-#include "string.h"
-#include "imports.h"
-
-#include "i915_drm.h"
-
-#include "intel_bufmgr_ttm.h"
-#ifdef TTM_API
-
-#define DBG(...) do {					\
-   if (bufmgr_ttm->bufmgr.debug)			\
-      fprintf(stderr, __VA_ARGS__);			\
-} while (0)
-
-/*
- * These bits are always specified in each validation
- * request. Other bits are not supported at this point
- * as it would require a bit of investigation to figure
- * out what mask value should be used.
- */
-#define INTEL_BO_MASK  (DRM_BO_MASK_MEM | \
-			DRM_BO_FLAG_READ | \
-			DRM_BO_FLAG_WRITE | \
-			DRM_BO_FLAG_EXE)
-
-struct intel_validate_entry {
-    dri_bo *bo;
-    struct drm_i915_op_arg bo_arg;
-};
-
-struct dri_ttm_bo_bucket_entry {
-   drmBO drm_bo;
-   struct dri_ttm_bo_bucket_entry *next;
-};
-
-struct dri_ttm_bo_bucket {
-   struct dri_ttm_bo_bucket_entry *head;
-   struct dri_ttm_bo_bucket_entry **tail;
-   /**
-    * Limit on the number of entries in this bucket.
-    *
-    * 0 means that this caching at this bucket size is disabled.
-    * -1 means that there is no limit to caching at this size.
-    */
-   int max_entries;
-   int num_entries;
-};
-
-/* Arbitrarily chosen, 16 means that the maximum size we'll cache for reuse
- * is 1 << 16 pages, or 256MB.
- */
-#define INTEL_TTM_BO_BUCKETS	16
-typedef struct _dri_bufmgr_ttm {
-    dri_bufmgr bufmgr;
-
-    int fd;
-    unsigned int fence_type;
-    unsigned int fence_type_flush;
-
-    uint32_t max_relocs;
-
-    struct intel_validate_entry *validate_array;
-    int validate_array_size;
-    int validate_count;
-
-    /** Array of lists of cached drmBOs of power-of-two sizes */
-    struct dri_ttm_bo_bucket cache_bucket[INTEL_TTM_BO_BUCKETS];
-} dri_bufmgr_ttm;
-
-/**
- * Private information associated with a relocation that isn't already stored
- * in the relocation buffer to be passed to the kernel.
- */
-struct dri_ttm_reloc {
-    dri_bo *target_buf;
-    uint64_t validate_flags;
-    /** Offset of target_buf after last execution of this relocation entry. */
-    unsigned int last_target_offset;
-};
-
-typedef struct _dri_bo_ttm {
-    dri_bo bo;
-
-    int refcount;
-    unsigned int map_count;
-    drmBO drm_bo;
-    const char *name;
-
-    uint64_t last_flags;
-
-    /**
-     * Index of the buffer within the validation list while preparing a
-     * batchbuffer execution.
-     */
-    int validate_index;
-
-    /** DRM buffer object containing relocation list */
-    uint32_t *reloc_buf_data;
-    struct dri_ttm_reloc *relocs;
-
-    /**
-     * Indicates that the buffer may be shared with other processes, so we
-     * can't hold maps beyond when the user does.
-     */
-    GLboolean shared;
-
-    GLboolean delayed_unmap;
-    /* Virtual address from the dri_bo_map whose unmap was delayed. */
-    void *saved_virtual;
-} dri_bo_ttm;
-
-typedef struct _dri_fence_ttm
-{
-    dri_fence fence;
-
-    int refcount;
-    const char *name;
-    drmFence drm_fence;
-} dri_fence_ttm;
-
-static int
-logbase2(int n)
-{
-   GLint i = 1;
-   GLint log2 = 0;
-
-   while (n > i) {
-      i *= 2;
-      log2++;
-   }
-
-   return log2;
-}
-
-static struct dri_ttm_bo_bucket *
-dri_ttm_bo_bucket_for_size(dri_bufmgr_ttm *bufmgr_ttm, unsigned long size)
-{
-    int i;
-
-    /* We only do buckets in power of two increments */
-    if ((size & (size - 1)) != 0)
-	return NULL;
-
-    /* We should only see sizes rounded to pages. */
-    assert((size % 4096) == 0);
-
-    /* We always allocate in units of pages */
-    i = ffs(size / 4096) - 1;
-    if (i >= INTEL_TTM_BO_BUCKETS)
-	return NULL;
-
-    return &bufmgr_ttm->cache_bucket[i];
-}
-
-
-static void dri_ttm_dump_validation_list(dri_bufmgr_ttm *bufmgr_ttm)
-{
-    int i, j;
-
-    for (i = 0; i < bufmgr_ttm->validate_count; i++) {
-	dri_bo *bo = bufmgr_ttm->validate_array[i].bo;
-	dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo;
-
-	if (bo_ttm->reloc_buf_data != NULL) {
-	    for (j = 0; j < (bo_ttm->reloc_buf_data[0] & 0xffff); j++) {
-		uint32_t *reloc_entry = bo_ttm->reloc_buf_data +
-		    I915_RELOC_HEADER +
-		    j * I915_RELOC0_STRIDE;
-		dri_bo *target_bo = bo_ttm->relocs[j].target_buf;
-		dri_bo_ttm *target_ttm = (dri_bo_ttm *)target_bo;
-
-		DBG("%2d: %s@0x%08x -> %s@0x%08lx + 0x%08x\n",
-		    i,
-		    bo_ttm->name, reloc_entry[0],
-		    target_ttm->name, target_bo->offset,
-		    reloc_entry[1]);
-	    }
-	} else {
-	    DBG("%2d: %s\n", i, bo_ttm->name);
-	}
-    }
-}
-
-/**
- * Adds the given buffer to the list of buffers to be validated (moved into the
- * appropriate memory type) with the next batch submission.
- *
- * If a buffer is validated multiple times in a batch submission, it ends up
- * with the intersection of the memory type flags and the union of the
- * access flags.
- */
-static void
-intel_add_validate_buffer(dri_bo *buf,
-			  uint64_t flags)
-{
-    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr;
-    dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf;
-
-    /* If we delayed doing an unmap to mitigate map/unmap syscall thrashing,
-     * do that now.
-     */
-    if (ttm_buf->delayed_unmap) {
-	drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo);
-	ttm_buf->delayed_unmap = GL_FALSE;
-    }
-
-    if (ttm_buf->validate_index == -1) {
-	struct intel_validate_entry *entry;
-	struct drm_i915_op_arg *arg;
-	struct drm_bo_op_req *req;
-	int index;
-
-	/* Extend the array of validation entries as necessary. */
-	if (bufmgr_ttm->validate_count == bufmgr_ttm->validate_array_size) {
-	    int i, new_size = bufmgr_ttm->validate_array_size * 2;
-
-	    if (new_size == 0)
-		new_size = 5;
-
-	    bufmgr_ttm->validate_array =
-	       realloc(bufmgr_ttm->validate_array,
-		       sizeof(struct intel_validate_entry) * new_size);
-	    bufmgr_ttm->validate_array_size = new_size;
-
-	    /* Update pointers for realloced mem. */
-	    for (i = 0; i < bufmgr_ttm->validate_count - 1; i++) {
-	       bufmgr_ttm->validate_array[i].bo_arg.next = (unsigned long)
-		  &bufmgr_ttm->validate_array[i + 1].bo_arg;
-	    }
-	}
-
-	/* Pick out the new array entry for ourselves */
-	index = bufmgr_ttm->validate_count;
-	ttm_buf->validate_index = index;
-	entry = &bufmgr_ttm->validate_array[index];
-	bufmgr_ttm->validate_count++;
-
-	/* Fill in array entry */
-	entry->bo = buf;
-	dri_bo_reference(buf);
-
-	/* Fill in kernel arg */
-	arg = &entry->bo_arg;
-	req = &arg->d.req;
-
-	memset(arg, 0, sizeof(*arg));
-	req->bo_req.handle = ttm_buf->drm_bo.handle;
-	req->op = drm_bo_validate;
-	req->bo_req.flags = flags;
-	req->bo_req.hint = 0;
-#ifdef DRM_BO_HINT_PRESUMED_OFFSET
-	/* PRESUMED_OFFSET indicates that all relocations pointing at this
-	 * buffer have the correct offset.  If any of our relocations don't,
-	 * this flag will be cleared off the buffer later in the relocation
-	 * processing.
-	 */
-	req->bo_req.hint |= DRM_BO_HINT_PRESUMED_OFFSET;
-	req->bo_req.presumed_offset = buf->offset;
-#endif
-	req->bo_req.mask = INTEL_BO_MASK;
-	req->bo_req.fence_class = 0; /* Backwards compat. */
-
-	if (ttm_buf->reloc_buf_data != NULL)
- 	    arg->reloc_ptr = (unsigned long)(void *)ttm_buf->reloc_buf_data;
-	else
-	    arg->reloc_ptr = 0;
-
-	/* Hook up the linked list of args for the kernel */
-	arg->next = 0;
-	if (index != 0) {
-	    bufmgr_ttm->validate_array[index - 1].bo_arg.next =
-		(unsigned long)arg;
-	}
-    } else {
-	struct intel_validate_entry *entry =
-	    &bufmgr_ttm->validate_array[ttm_buf->validate_index];
-	struct drm_i915_op_arg *arg = &entry->bo_arg;
-	struct drm_bo_op_req *req = &arg->d.req;
-	uint64_t memFlags = req->bo_req.flags & flags & DRM_BO_MASK_MEM;
-	uint64_t modeFlags = (req->bo_req.flags | flags) & ~DRM_BO_MASK_MEM;
-
-	/* Buffer was already in the validate list.  Extend its flags as
-	 * necessary.
-	 */
-
-	if (memFlags == 0) {
-	    fprintf(stderr,
-		    "%s: No shared memory types between "
-		    "0x%16llx and 0x%16llx\n",
-		    __FUNCTION__, req->bo_req.flags, flags);
-	    abort();
-	}
-	if (flags & ~INTEL_BO_MASK) {
-	    fprintf(stderr,
-		    "%s: Flags bits 0x%16llx are not supposed to be used in a relocation\n",
-		    __FUNCTION__, flags & ~INTEL_BO_MASK);
-	    abort();
-	}
-	req->bo_req.flags = memFlags | modeFlags;
-    }
-}
-
-
-#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \
-	sizeof(uint32_t))
-
-static int
-intel_setup_reloc_list(dri_bo *bo)
-{
-    dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo;
-    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bo->bufmgr;
-
-    bo_ttm->relocs = calloc(bufmgr_ttm->max_relocs,
-			    sizeof(struct dri_ttm_reloc));
-    bo_ttm->reloc_buf_data = calloc(1, RELOC_BUF_SIZE(bufmgr_ttm->max_relocs));
-
-    /* Initialize the relocation list with the header:
-     * DWORD 0: relocation count
-     * DWORD 1: relocation type  
-     * DWORD 2+3: handle to next relocation list (currently none) 64-bits
-     */
-    bo_ttm->reloc_buf_data[0] = 0;
-    bo_ttm->reloc_buf_data[1] = I915_RELOC_TYPE_0;
-    bo_ttm->reloc_buf_data[2] = 0;
-    bo_ttm->reloc_buf_data[3] = 0;
-
-    return 0;
-}
-
-#if 0
-int
-driFenceSignaled(DriFenceObject * fence, unsigned type)
-{
-    int signaled;
-    int ret;
-
-    if (fence == NULL)
-	return GL_TRUE;
-
-    ret = drmFenceSignaled(bufmgr_ttm->fd, &fence->fence, type, &signaled);
-    BM_CKFATAL(ret);
-    return signaled;
-}
-#endif
-
-static dri_bo *
-dri_ttm_alloc(dri_bufmgr *bufmgr, const char *name,
-	      unsigned long size, unsigned int alignment,
-	      uint64_t location_mask)
-{
-    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
-    dri_bo_ttm *ttm_buf;
-    unsigned int pageSize = getpagesize();
-    int ret;
-    uint64_t flags;
-    unsigned int hint;
-    unsigned long alloc_size;
-    struct dri_ttm_bo_bucket *bucket;
-    GLboolean alloc_from_cache = GL_FALSE;
-
-    ttm_buf = calloc(1, sizeof(*ttm_buf));
-    if (!ttm_buf)
-	return NULL;
-
-    /* The mask argument doesn't do anything for us that we want other than
-     * determine which pool (TTM or local) the buffer is allocated into, so
-     * just pass all of the allocation class flags.
-     */
-    flags = location_mask | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE |
-	DRM_BO_FLAG_EXE;
-    /* No hints we want to use. */
-    hint = 0;
-
-    /* Round the allocated size up to a power of two number of pages. */
-    alloc_size = 1 << logbase2(size);
-    if (alloc_size < pageSize)
-	alloc_size = pageSize;
-    bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, alloc_size);
-
-    /* If we don't have caching at this size, don't actually round the
-     * allocation up.
-     */
-    if (bucket == NULL || bucket->max_entries == 0)
-	alloc_size = size;
-
-    /* Get a buffer out of the cache if available */
-    if (bucket != NULL && bucket->num_entries > 0) {
-	struct dri_ttm_bo_bucket_entry *entry = bucket->head;
-	int busy;
-
-	/* Check if the buffer is still in flight.  If not, reuse it. */
-	ret = drmBOBusy(bufmgr_ttm->fd, &entry->drm_bo, &busy);
-	alloc_from_cache = (ret == 0 && busy == 0);
-
-	if (alloc_from_cache) {
-	    bucket->head = entry->next;
-	    if (entry->next == NULL)
-		bucket->tail = &bucket->head;
-	    bucket->num_entries--;
-
-	    ttm_buf->drm_bo = entry->drm_bo;
-	    free(entry);
-	}
-    }
-
-    if (!alloc_from_cache) {
-	ret = drmBOCreate(bufmgr_ttm->fd, alloc_size, alignment / pageSize,
-			  NULL, flags, hint, &ttm_buf->drm_bo);
-	if (ret != 0) {
-	    free(ttm_buf);
-	    return NULL;
-	}
-    }
-
-    ttm_buf->bo.size = size;
-    ttm_buf->bo.offset = ttm_buf->drm_bo.offset;
-    ttm_buf->bo.virtual = NULL;
-    ttm_buf->bo.bufmgr = bufmgr;
-    ttm_buf->name = name;
-    ttm_buf->refcount = 1;
-    ttm_buf->reloc_buf_data = NULL;
-    ttm_buf->relocs = NULL;
-    ttm_buf->last_flags = ttm_buf->drm_bo.flags;
-    ttm_buf->shared = GL_FALSE;
-    ttm_buf->delayed_unmap = GL_FALSE;
-    ttm_buf->validate_index = -1;
-
-    DBG("bo_create: %p (%s) %ldb\n", &ttm_buf->bo, ttm_buf->name, size);
-
-    return &ttm_buf->bo;
-}
-
-/* Our TTM backend doesn't allow creation of static buffers, as that requires
- * privelege for the non-fake case, and the lock in the fake case where we were
- * working around the X Server not creating buffers and passing handles to us.
- */
-static dri_bo *
-dri_ttm_alloc_static(dri_bufmgr *bufmgr, const char *name,
-		     unsigned long offset, unsigned long size, void *virtual,
-		     uint64_t location_mask)
-{
-    return NULL;
-}
-
-/**
- * Returns a dri_bo wrapping the given buffer object handle.
- *
- * This can be used when one application needs to pass a buffer object
- * to another.
- */
-dri_bo *
-intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name,
-			      unsigned int handle)
-{
-    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
-    dri_bo_ttm *ttm_buf;
-    int ret;
-
-    ttm_buf = calloc(1, sizeof(*ttm_buf));
-    if (!ttm_buf)
-	return NULL;
-
-    ret = drmBOReference(bufmgr_ttm->fd, handle, &ttm_buf->drm_bo);
-    if (ret != 0) {
-       fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n",
-	       name, handle, strerror(-ret));
-	free(ttm_buf);
-	return NULL;
-    }
-    ttm_buf->bo.size = ttm_buf->drm_bo.size;
-    ttm_buf->bo.offset = ttm_buf->drm_bo.offset;
-    ttm_buf->bo.virtual = NULL;
-    ttm_buf->bo.bufmgr = bufmgr;
-    ttm_buf->name = name;
-    ttm_buf->refcount = 1;
-    ttm_buf->reloc_buf_data = NULL;
-    ttm_buf->relocs = NULL;
-    ttm_buf->last_flags = ttm_buf->drm_bo.flags;
-    ttm_buf->shared = GL_TRUE;
-    ttm_buf->delayed_unmap = GL_FALSE;
-    ttm_buf->validate_index = -1;
-
-    DBG("bo_create_from_handle: %p %08x (%s)\n",
-	&ttm_buf->bo, handle, ttm_buf->name);
-
-    return &ttm_buf->bo;
-}
-
-static void
-dri_ttm_bo_reference(dri_bo *buf)
-{
-    dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf;
-
-    ttm_buf->refcount++;
-}
-
-static void
-dri_ttm_bo_unreference(dri_bo *buf)
-{
-    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr;
-    dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf;
-
-    if (!buf)
-	return;
-
-    if (--ttm_buf->refcount == 0) {
-	struct dri_ttm_bo_bucket *bucket;
-	int ret;
-
-	assert(ttm_buf->map_count == 0);
-
-	if (ttm_buf->reloc_buf_data) {
-	    int i;
-
-	    /* Unreference all the target buffers */
-	    for (i = 0; i < (ttm_buf->reloc_buf_data[0] & 0xffff); i++)
-		 dri_bo_unreference(ttm_buf->relocs[i].target_buf);
-	    free(ttm_buf->relocs);
-
-	    /* Free the kernel BO containing relocation entries */
-	    free(ttm_buf->reloc_buf_data);
-	    ttm_buf->reloc_buf_data = NULL;
-	}
-
-	if (ttm_buf->delayed_unmap) {
-	    int ret = drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo);
-
-	    if (ret != 0) {
-		fprintf(stderr, "%s:%d: Error unmapping buffer %s: %s.\n",
-			__FILE__, __LINE__, ttm_buf->name, strerror(-ret));
-	   }
-	}
-
-	bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, ttm_buf->drm_bo.size);
-	/* Put the buffer into our internal cache for reuse if we can. */
-	if (!ttm_buf->shared &&
-	    bucket != NULL &&
-	    (bucket->max_entries == -1 ||
-	     (bucket->max_entries > 0 &&
-	      bucket->num_entries < bucket->max_entries)))
-	{
-	    struct dri_ttm_bo_bucket_entry *entry;
-
-	    entry = calloc(1, sizeof(*entry));
-	    entry->drm_bo = ttm_buf->drm_bo;
-
-	    entry->next = NULL;
-	    *bucket->tail = entry;
-	    bucket->tail = &entry->next;
-	    bucket->num_entries++;
-	} else {
-	    /* Decrement the kernel refcount for the buffer. */
-	    ret = drmBOUnreference(bufmgr_ttm->fd, &ttm_buf->drm_bo);
-	    if (ret != 0) {
-	       fprintf(stderr, "drmBOUnreference failed (%s): %s\n",
-		       ttm_buf->name, strerror(-ret));
-	    }
-	}
-
-	DBG("bo_unreference final: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
-
-	free(buf);
-	return;
-    }
-}
-
-static int
-dri_ttm_bo_map(dri_bo *buf, GLboolean write_enable)
-{
-    dri_bufmgr_ttm *bufmgr_ttm;
-    dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf;
-    uint64_t flags;
-    int ret;
-
-    bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr;
-
-    flags = DRM_BO_FLAG_READ;
-    if (write_enable)
-	flags |= DRM_BO_FLAG_WRITE;
-
-    /* Allow recursive mapping. Mesa may recursively map buffers with
-     * nested display loops.
-     */
-    if (ttm_buf->map_count++ != 0)
-	return 0;
-
-    assert(buf->virtual == NULL);
-
-    DBG("bo_map: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
-
-    /* XXX: What about if we're upgrading from READ to WRITE? */
-    if (ttm_buf->delayed_unmap) {
-	buf->virtual = ttm_buf->saved_virtual;
-	return 0;
-    }
-
-    ret = drmBOMap(bufmgr_ttm->fd, &ttm_buf->drm_bo, flags, 0, &buf->virtual);
-    if (ret != 0) {
-        fprintf(stderr, "%s:%d: Error mapping buffer %s: %s .\n",
-		__FILE__, __LINE__, ttm_buf->name, strerror(-ret));
-    }
-
-    return ret;
-}
-
-static int
-dri_ttm_bo_unmap(dri_bo *buf)
-{
-    dri_bufmgr_ttm *bufmgr_ttm;
-    dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf;
-    int ret;
-
-    if (buf == NULL)
-	return 0;
-
-    assert(ttm_buf->map_count != 0);
-    if (--ttm_buf->map_count != 0)
-	return 0;
-
-    bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr;
-
-    assert(buf->virtual != NULL);
-
-    DBG("bo_unmap: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
-
-    if (!ttm_buf->shared) {
-	ttm_buf->saved_virtual = buf->virtual;
-	ttm_buf->delayed_unmap = GL_TRUE;
-	buf->virtual = NULL;
-
-	return 0;
-    }
-
-    buf->virtual = NULL;
-
-    ret = drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo);
-    if (ret != 0) {
-        fprintf(stderr, "%s:%d: Error unmapping buffer %s: %s.\n",
-		__FILE__, __LINE__, ttm_buf->name, strerror(-ret));
-    }
-
-    return ret;
-}
-
-/**
- * Returns a dri_bo wrapping the given buffer object handle.
- *
- * This can be used when one application needs to pass a buffer object
- * to another.
- */
-dri_fence *
-intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name,
-				drm_fence_arg_t *arg)
-{
-    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
-    dri_fence_ttm *ttm_fence;
-
-    ttm_fence = malloc(sizeof(*ttm_fence));
-    if (!ttm_fence)
-	return NULL;
-
-    ttm_fence->drm_fence.handle = arg->handle;
-    ttm_fence->drm_fence.fence_class = arg->fence_class;
-    ttm_fence->drm_fence.type = arg->type;
-    ttm_fence->drm_fence.flags = arg->flags;
-    ttm_fence->drm_fence.signaled = 0;
-    ttm_fence->drm_fence.sequence = arg->sequence;
-
-    ttm_fence->fence.bufmgr = bufmgr;
-    ttm_fence->name = name;
-    ttm_fence->refcount = 1;
-
-    DBG("fence_create_from_handle: %p (%s)\n",
-	&ttm_fence->fence, ttm_fence->name);
-
-    return &ttm_fence->fence;
-}
-
-
-static void
-dri_ttm_fence_reference(dri_fence *fence)
-{
-    dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence;
-    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr;
-
-    ++fence_ttm->refcount;
-    DBG("fence_reference: %p (%s)\n", &fence_ttm->fence, fence_ttm->name);
-}
-
-static void
-dri_ttm_fence_unreference(dri_fence *fence)
-{
-    dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence;
-    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr;
-
-    if (!fence)
-	return;
-
-    DBG("fence_unreference: %p (%s)\n", &fence_ttm->fence, fence_ttm->name);
-
-    if (--fence_ttm->refcount == 0) {
-	int ret;
-
-	ret = drmFenceUnreference(bufmgr_ttm->fd, &fence_ttm->drm_fence);
-	if (ret != 0) {
-	    fprintf(stderr, "drmFenceUnreference failed (%s): %s\n",
-		    fence_ttm->name, strerror(-ret));
-	}
-
-	free(fence);
-	return;
-    }
-}
-
-static void
-dri_ttm_fence_wait(dri_fence *fence)
-{
-    dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence;
-    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr;
-    int ret;
-
-    ret = drmFenceWait(bufmgr_ttm->fd, DRM_FENCE_FLAG_WAIT_LAZY, &fence_ttm->drm_fence, 0);
-    if (ret != 0) {
-        fprintf(stderr, "%s:%d: Error waiting for fence %s: %s.\n",
-		__FILE__, __LINE__, fence_ttm->name, strerror(-ret));
-	abort();
-    }
-
-    DBG("fence_wait: %p (%s)\n", &fence_ttm->fence, fence_ttm->name);
-}
-
-static void
-dri_bufmgr_ttm_destroy(dri_bufmgr *bufmgr)
-{
-    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
-    int i;
-
-    free(bufmgr_ttm->validate_array);
-
-    /* Free any cached buffer objects we were going to reuse */
-    for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) {
-	struct dri_ttm_bo_bucket *bucket = &bufmgr_ttm->cache_bucket[i];
-	struct dri_ttm_bo_bucket_entry *entry;
-
-	while ((entry = bucket->head) != NULL) {
-	    int ret;
-
-	    bucket->head = entry->next;
-	    if (entry->next == NULL)
-		bucket->tail = &bucket->head;
-	    bucket->num_entries--;
-
-	    /* Decrement the kernel refcount for the buffer. */
-	    ret = drmBOUnreference(bufmgr_ttm->fd, &entry->drm_bo);
-	    if (ret != 0) {
-	       fprintf(stderr, "drmBOUnreference failed: %s\n",
-		       strerror(-ret));
-	    }
-
-	    free(entry);
-	}
-    }
-
-    free(bufmgr);
-}
-
-/**
- * Adds the target buffer to the validation list and adds the relocation
- * to the reloc_buffer's relocation list.
- *
- * The relocation entry at the given offset must already contain the
- * precomputed relocation value, because the kernel will optimize out
- * the relocation entry write when the buffer hasn't moved from the
- * last known offset in target_buf.
- */
-static int
-dri_ttm_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta,
-		   GLuint offset, dri_bo *target_buf)
-{
-    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)reloc_buf->bufmgr;
-    dri_bo_ttm *reloc_buf_ttm = (dri_bo_ttm *)reloc_buf;
-    dri_bo_ttm *target_buf_ttm = (dri_bo_ttm *)target_buf;
-    int num_relocs;
-    uint32_t *this_reloc;
-
-    /* Create a new relocation list if needed */
-    if (reloc_buf_ttm->reloc_buf_data == NULL)
-	intel_setup_reloc_list(reloc_buf);
-
-    num_relocs = reloc_buf_ttm->reloc_buf_data[0];
-
-    /* Check overflow */
-    assert(num_relocs < bufmgr_ttm->max_relocs);
-
-    this_reloc = reloc_buf_ttm->reloc_buf_data + I915_RELOC_HEADER +
-	num_relocs * I915_RELOC0_STRIDE;
-
-    this_reloc[0] = offset;
-    this_reloc[1] = delta;
-    this_reloc[2] = target_buf_ttm->drm_bo.handle; /* To be filled in at exec time */
-    this_reloc[3] = 0;
-
-    reloc_buf_ttm->relocs[num_relocs].validate_flags = flags;
-    reloc_buf_ttm->relocs[num_relocs].target_buf = target_buf;
-    dri_bo_reference(target_buf);
-
-    reloc_buf_ttm->reloc_buf_data[0]++; /* Increment relocation count */
-    /* Check wraparound */
-    assert(reloc_buf_ttm->reloc_buf_data[0] != 0);
-    return 0;
-}
-
-/**
- * Walk the tree of relocations rooted at BO and accumulate the list of
- * validations to be performed and update the relocation buffers with
- * index values into the validation list.
- */
-static void
-dri_ttm_bo_process_reloc(dri_bo *bo)
-{
-    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bo->bufmgr;
-    dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo;
-    unsigned int nr_relocs;
-    int i;
-
-    if (bo_ttm->reloc_buf_data == NULL)
-	return;
-
-    nr_relocs = bo_ttm->reloc_buf_data[0] & 0xffff;
-
-    for (i = 0; i < nr_relocs; i++) {
-	struct dri_ttm_reloc *r = &bo_ttm->relocs[i];
-
-	/* Continue walking the tree depth-first. */
-	dri_ttm_bo_process_reloc(r->target_buf);
-
-	/* Add the target to the validate list */
-	intel_add_validate_buffer(r->target_buf, r->validate_flags);
-
-	/* Clear the PRESUMED_OFFSET flag from the validate list entry of the
-	 * target if this buffer has a stale relocated pointer at it.
-	 */
-	if (r->last_target_offset != r->target_buf->offset) {
-	   dri_bo_ttm *target_buf_ttm = (dri_bo_ttm *)r->target_buf;
-	   struct intel_validate_entry *entry =
-	      &bufmgr_ttm->validate_array[target_buf_ttm->validate_index];
-
-	   entry->bo_arg.d.req.bo_req.hint &= ~DRM_BO_HINT_PRESUMED_OFFSET;
-	}
-    }
-}
-
-static void *
-dri_ttm_process_reloc(dri_bo *batch_buf, GLuint *count)
-{
-    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr;
-
-    /* Update indices and set up the validate list. */
-    dri_ttm_bo_process_reloc(batch_buf);
-
-    /* Add the batch buffer to the validation list.  There are no relocations
-     * pointing to it.
-     */
-    intel_add_validate_buffer(batch_buf,
-			      DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE);
-
-    *count = bufmgr_ttm->validate_count;
-    return &bufmgr_ttm->validate_array[0].bo_arg;
-}
-
-static const char *
-intel_get_flags_mem_type_string(uint64_t flags)
-{
-    switch (flags & DRM_BO_MASK_MEM) {
-    case DRM_BO_FLAG_MEM_LOCAL: return "local";
-    case DRM_BO_FLAG_MEM_TT: return "ttm";
-    case DRM_BO_FLAG_MEM_VRAM: return "vram";
-    case DRM_BO_FLAG_MEM_PRIV0: return "priv0";
-    case DRM_BO_FLAG_MEM_PRIV1: return "priv1";
-    case DRM_BO_FLAG_MEM_PRIV2: return "priv2";
-    case DRM_BO_FLAG_MEM_PRIV3: return "priv3";
-    case DRM_BO_FLAG_MEM_PRIV4: return "priv4";
-    default: return NULL;
-    }
-}
-
-static const char *
-intel_get_flags_caching_string(uint64_t flags)
-{
-    switch (flags & (DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED)) {
-    case 0: return "UU";
-    case DRM_BO_FLAG_CACHED: return "CU";
-    case DRM_BO_FLAG_CACHED_MAPPED: return "UC";
-    case DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED: return "CC";
-    default: return NULL;
-    }
-}
-
-static void
-intel_update_buffer_offsets (dri_bufmgr_ttm *bufmgr_ttm)
-{
-    int i;
-
-    for (i = 0; i < bufmgr_ttm->validate_count; i++) {
-	dri_bo *bo = bufmgr_ttm->validate_array[i].bo;
-	dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo;
-	struct drm_i915_op_arg *arg = &bufmgr_ttm->validate_array[i].bo_arg;
-	struct drm_bo_arg_rep *rep = &arg->d.rep;
-
-	/* Update the flags */
-	if (rep->bo_info.flags != bo_ttm->last_flags) {
-	    DBG("BO %s migrated: %s/%s -> %s/%s\n",
-		bo_ttm->name,
-		intel_get_flags_mem_type_string(bo_ttm->last_flags),
-		intel_get_flags_caching_string(bo_ttm->last_flags),
-		intel_get_flags_mem_type_string(rep->bo_info.flags),
-		intel_get_flags_caching_string(rep->bo_info.flags));
-
-	    bo_ttm->last_flags = rep->bo_info.flags;
-	}
-	/* Update the buffer offset */
-	if (rep->bo_info.offset != bo->offset) {
-	    DBG("BO %s migrated: 0x%08lx -> 0x%08lx\n",
-		bo_ttm->name, bo->offset, (unsigned long)rep->bo_info.offset);
-	    bo->offset = rep->bo_info.offset;
-	}
-    }
-}
-
-/**
- * Update the last target offset field of relocation entries for PRESUMED_OFFSET
- * computation.
- */
-static void
-dri_ttm_bo_post_submit(dri_bo *bo)
-{
-    dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo;
-    unsigned int nr_relocs;
-    int i;
-
-    if (bo_ttm->reloc_buf_data == NULL)
-	return;
-
-    nr_relocs = bo_ttm->reloc_buf_data[0] & 0xffff;
-
-    for (i = 0; i < nr_relocs; i++) {
-	struct dri_ttm_reloc *r = &bo_ttm->relocs[i];
-
-	/* Continue walking the tree depth-first. */
-	dri_ttm_bo_post_submit(r->target_buf);
-
-	r->last_target_offset = r->target_buf->offset;
-    }
-}
-
-static void
-dri_ttm_post_submit(dri_bo *batch_buf, dri_fence **last_fence)
-{
-    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr;
-    int i;
-
-    intel_update_buffer_offsets (bufmgr_ttm);
-
-    dri_ttm_bo_post_submit(batch_buf);
-
-    if (bufmgr_ttm->bufmgr.debug)
-	dri_ttm_dump_validation_list(bufmgr_ttm);
-
-    for (i = 0; i < bufmgr_ttm->validate_count; i++) {
-	dri_bo *bo = bufmgr_ttm->validate_array[i].bo;
-	dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo;
-
-	/* Disconnect the buffer from the validate list */
-	bo_ttm->validate_index = -1;
-	dri_bo_unreference(bo);
-	bufmgr_ttm->validate_array[i].bo = NULL;
-    }
-    bufmgr_ttm->validate_count = 0;
-}
-
-/**
- * Enables unlimited caching of buffer objects for reuse.
- *
- * This is potentially very memory expensive, as the cache at each bucket
- * size is only bounded by how many buffers of that size we've managed to have
- * in flight at once.
- */
-void
-intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr)
-{
-    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
-    int i;
-
-    for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) {
-	bufmgr_ttm->cache_bucket[i].max_entries = -1;
-    }
-}
-
-/*
- *
- */
-static int
-dri_ttm_check_aperture_space(dri_bo *bo)
-{
-    return 0;
-}
-
-/**
- * Initializes the TTM buffer manager, which uses the kernel to allocate, map,
- * and manage map buffer objections.
- *
- * \param fd File descriptor of the opened DRM device.
- * \param fence_type Driver-specific fence type used for fences with no flush.
- * \param fence_type_flush Driver-specific fence type used for fences with a
- *	  flush.
- */
-dri_bufmgr *
-intel_bufmgr_ttm_init(int fd, unsigned int fence_type,
-		      unsigned int fence_type_flush, int batch_size)
-{
-    dri_bufmgr_ttm *bufmgr_ttm;
-    int i;
-
-    bufmgr_ttm = calloc(1, sizeof(*bufmgr_ttm));
-    bufmgr_ttm->fd = fd;
-    bufmgr_ttm->fence_type = fence_type;
-    bufmgr_ttm->fence_type_flush = fence_type_flush;
-
-    /* Let's go with one relocation per every 2 dwords (but round down a bit
-     * since a power of two will mean an extra page allocation for the reloc
-     * buffer).
-     *
-     * Every 4 was too few for the blender benchmark.
-     */
-    bufmgr_ttm->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
-
-    bufmgr_ttm->bufmgr.bo_alloc = dri_ttm_alloc;
-    bufmgr_ttm->bufmgr.bo_alloc_static = dri_ttm_alloc_static;
-    bufmgr_ttm->bufmgr.bo_reference = dri_ttm_bo_reference;
-    bufmgr_ttm->bufmgr.bo_unreference = dri_ttm_bo_unreference;
-    bufmgr_ttm->bufmgr.bo_map = dri_ttm_bo_map;
-    bufmgr_ttm->bufmgr.bo_unmap = dri_ttm_bo_unmap;
-    bufmgr_ttm->bufmgr.fence_reference = dri_ttm_fence_reference;
-    bufmgr_ttm->bufmgr.fence_unreference = dri_ttm_fence_unreference;
-    bufmgr_ttm->bufmgr.fence_wait = dri_ttm_fence_wait;
-    bufmgr_ttm->bufmgr.destroy = dri_bufmgr_ttm_destroy;
-    bufmgr_ttm->bufmgr.emit_reloc = dri_ttm_emit_reloc;
-    bufmgr_ttm->bufmgr.process_relocs = dri_ttm_process_reloc;
-    bufmgr_ttm->bufmgr.post_submit = dri_ttm_post_submit;
-    bufmgr_ttm->bufmgr.debug = GL_FALSE;
-    bufmgr_ttm->bufmgr.check_aperture_space = dri_ttm_check_aperture_space;
-    /* Initialize the linked lists for BO reuse cache. */
-    for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++)
-	bufmgr_ttm->cache_bucket[i].tail = &bufmgr_ttm->cache_bucket[i].head;
-
-    return &bufmgr_ttm->bufmgr;
-}
-#else
-dri_bufmgr *
-intel_bufmgr_ttm_init(int fd, unsigned int fence_type,
-		      unsigned int fence_type_flush, int batch_size)
-{
-    return NULL;
-}
-
-dri_bo *
-intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name,
-			      unsigned int handle)
-{
-    return NULL;
-}
-
-void
-intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr)
-{
-}
-#endif
diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h
deleted file mode 100644
index f5bd64c90f..0000000000
--- a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h
+++ /dev/null
@@ -1,28 +0,0 @@
-
-#ifndef INTEL_BUFMGR_TTM_H
-#define INTEL_BUFMGR_TTM_H
-
-#include "dri_bufmgr.h"
-
-extern dri_bo *intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name,
-					       unsigned int handle);
-
-#ifdef TTM_API
-dri_fence *intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name,
-					   drm_fence_arg_t *arg);
-#endif
-
-
-dri_bufmgr *intel_bufmgr_ttm_init(int fd, unsigned int fence_type,
-				  unsigned int fence_type_flush, int batch_size);
-
-void
-intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr);
-
-#ifndef TTM_API
-#define DRM_I915_FENCE_CLASS_ACCEL 0
-#define DRM_I915_FENCE_TYPE_RW 2
-#define DRM_I915_FENCE_FLAG_FLUSHED 0x01000000
-#endif
-
-#endif
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index 671b3f68a3..6d7d6811ac 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -59,7 +59,7 @@
 #include "intel_buffer_objects.h"
 #include "intel_fbo.h"
 #include "intel_decode.h"
-#include "intel_bufmgr_ttm.h"
+#include "intel_bufmgr.h"
 
 #include "drirenderbuffer.h"
 #include "vblank.h"
@@ -367,20 +367,26 @@ intelFlush(GLcontext * ctx)
 
    if (intel->batch->map != intel->batch->ptr)
       intel_batchbuffer_flush(intel->batch);
-
-   /* XXX: Need to do an MI_FLUSH here.
-    */
 }
 
 void
 intelFinish(GLcontext * ctx)
 {
-   struct intel_context *intel = intel_context(ctx);
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   int i;
+
    intelFlush(ctx);
-   if (intel->batch->last_fence) {
-      dri_fence_wait(intel->batch->last_fence);
-      dri_fence_unreference(intel->batch->last_fence);
-      intel->batch->last_fence = NULL;
+
+   for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
+       struct intel_renderbuffer *irb;
+
+       irb = intel_renderbuffer(fb->_ColorDrawBuffers[i]);
+
+       if (irb->region)
+	  dri_bo_wait_rendering(irb->region->buffer);
+   }
+   if (fb->_DepthBuffer) {
+      /* XXX: Wait on buffer idle */
    }
 }
 
@@ -446,28 +452,25 @@ static GLboolean
 intel_init_bufmgr(struct intel_context *intel)
 {
    intelScreenPrivate *intelScreen = intel->intelScreen;
-   GLboolean ttm_disable = getenv("INTEL_NO_TTM") != NULL;
-   GLboolean ttm_supported;
+   GLboolean gem_disable = getenv("INTEL_NO_GEM") != NULL;
+   GLboolean gem_supported;
 
-   /* If we've got a new enough DDX that's initializing TTM and giving us
+   /* If we've got a new enough DDX that's initializing GEM and giving us
     * object handles for the shared buffers, use that.
     */
    intel->ttm = GL_FALSE;
    if (intel->intelScreen->driScrnPriv->dri2.enabled)
-       ttm_supported = GL_TRUE;
+       gem_supported = GL_TRUE;
    else if (intel->intelScreen->driScrnPriv->ddx_version.minor >= 9 &&
 	    intel->intelScreen->drmMinor >= 11 &&
 	    intel->intelScreen->front.bo_handle != -1)
-       ttm_supported = GL_TRUE;
+       gem_supported = GL_TRUE;
    else
-       ttm_supported = GL_FALSE;
+       gem_supported = GL_FALSE;
 
-   if (!ttm_disable && ttm_supported) {
+   if (!gem_disable && gem_supported) {
       int bo_reuse_mode;
-      intel->bufmgr = intel_bufmgr_ttm_init(intel->driFd,
-					    DRM_FENCE_TYPE_EXE,
-					    DRM_FENCE_TYPE_EXE |
-					    DRM_I915_FENCE_TYPE_RW,
+      intel->bufmgr = intel_bufmgr_gem_init(intel->driFd,
 					    BATCH_SZ);
       if (intel->bufmgr != NULL)
 	 intel->ttm = GL_TRUE;
@@ -477,16 +480,16 @@ intel_init_bufmgr(struct intel_context *intel)
       case DRI_CONF_BO_REUSE_DISABLED:
 	 break;
       case DRI_CONF_BO_REUSE_ALL:
-	 intel_ttm_enable_bo_reuse(intel->bufmgr);
+	 intel_bufmgr_gem_enable_reuse(intel->bufmgr);
 	 break;
       }
    }
    /* Otherwise, use the classic buffer manager. */
    if (intel->bufmgr == NULL) {
-      if (ttm_disable) {
-	 fprintf(stderr, "TTM buffer manager disabled.  Using classic.\n");
+      if (gem_disable) {
+	 fprintf(stderr, "GEM disabled.  Using classic.\n");
       } else {
-	 fprintf(stderr, "Failed to initialize TTM buffer manager.  "
+	 fprintf(stderr, "Failed to initialize GEM.  "
 		 "Falling back to classic.\n");
       }
 
@@ -496,14 +499,17 @@ intel_init_bufmgr(struct intel_context *intel)
 	 return GL_FALSE;
       }
 
-      intel->bufmgr = dri_bufmgr_fake_init(intelScreen->tex.offset,
-					   intelScreen->tex.map,
-					   intelScreen->tex.size,
-					   intel_fence_emit,
-					   intel_fence_wait,
-					   intel);
+      intel->bufmgr = intel_bufmgr_fake_init(intelScreen->tex.offset,
+					     intelScreen->tex.map,
+					     intelScreen->tex.size,
+					     intel_fence_emit,
+					     intel_fence_wait,
+					     intel);
    }
 
+   /* XXX bufmgr should be per-screen, not per-context */
+   intelScreen->ttm = intel->ttm;
+
    return GL_TRUE;
 }
 
@@ -671,8 +677,6 @@ intelInitContext(struct intel_context *intel,
       intel_recreate_static_regions(intel);
 
    intel->batch = intel_batchbuffer_alloc(intel);
-   intel->last_swap_fence = NULL;
-   intel->first_swap_fence = NULL;
 
    intel_bufferobj_init(intel);
    intel_fbo_init(intel);
@@ -690,7 +694,6 @@ intelInitContext(struct intel_context *intel,
    /* Force all software fallbacks */
    if (driQueryOptionb(&intel->optionCache, "no_rast")) {
       fprintf(stderr, "disabling 3D rasterization\n");
-      FALLBACK(intel, INTEL_FALLBACK_USER, 1);
       intel->no_rast = 1;
    }
 
@@ -725,17 +728,7 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv)
       intel->Fallback = 0;      /* don't call _swrast_Flush later */
 
       intel_batchbuffer_free(intel->batch);
-
-      if (intel->last_swap_fence) {
-	 dri_fence_wait(intel->last_swap_fence);
-	 dri_fence_unreference(intel->last_swap_fence);
-	 intel->last_swap_fence = NULL;
-      }
-      if (intel->first_swap_fence) {
-	 dri_fence_wait(intel->first_swap_fence);
-	 dri_fence_unreference(intel->first_swap_fence);
-	 intel->first_swap_fence = NULL;
-      }
+      free(intel->prim.vb);
 
       if (release_texture_heaps) {
          /* This share group is about to go away, free our private
@@ -887,7 +880,7 @@ intelContendedLock(struct intel_context *intel, GLuint flags)
     */
    if (!intel->ttm && sarea->texAge != intel->hHWContext) {
       sarea->texAge = intel->hHWContext;
-      dri_bufmgr_fake_contended_lock_take(intel->bufmgr);
+      intel_bufmgr_fake_contended_lock_take(intel->bufmgr);
       if (INTEL_DEBUG & DEBUG_BATCH)
 	 intel_decode_context_reset();
       if (INTEL_DEBUG & DEBUG_BUFMGR)
diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
index df79ab8897..f1116d2747 100644
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -35,6 +35,7 @@
 #include "mm.h"
 #include "texmem.h"
 #include "dri_bufmgr.h"
+#include "intel_bufmgr.h"
 
 #include "intel_screen.h"
 #include "intel_tex_obj.h"
@@ -85,6 +86,7 @@ struct intel_context
    {
       void (*destroy) (struct intel_context * intel);
       void (*emit_state) (struct intel_context * intel);
+      void (*finish_batch) (struct intel_context * intel);
       void (*new_batch) (struct intel_context * intel);
       void (*emit_invarient_state) (struct intel_context * intel);
       void (*note_fence) (struct intel_context *intel, GLuint fence);
@@ -174,9 +176,6 @@ struct intel_context
     */
    GLboolean ttm;
 
-   dri_fence *last_swap_fence;
-   dri_fence *first_swap_fence;
-
    struct intel_batchbuffer *batch;
    GLboolean no_batch_wrap;
    unsigned batch_id;
@@ -184,9 +183,13 @@ struct intel_context
    struct
    {
       GLuint id;
-      GLuint primitive;
-      GLubyte *start_ptr;
+      uint32_t primitive;	/**< Current hardware primitive type */
       void (*flush) (struct intel_context *);
+      dri_bo *vb_bo;
+      uint8_t *vb;
+      unsigned int start_offset; /**< Byte offset of primitive sequence */
+      unsigned int current_offset; /**< Byte offset of next vertex */
+      unsigned int count;	/**< Number of vertices in current primitive */
    } prim;
 
    GLuint stats_wm;
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index b3f6610546..bc0b579429 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -295,7 +295,8 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
       rb->Height = height;
 
       /* This sets the Get/PutRow/Value functions */
-      intel_set_span_functions(&irb->Base);
+      /* XXX can we choose a different tile here? */
+      intel_set_span_functions(&irb->Base, INTEL_TILE_NONE);
 
       return GL_TRUE;
    }
@@ -375,7 +376,7 @@ intel_renderbuffer_set_region(struct intel_renderbuffer *rb,
  * not a user-created renderbuffer.
  */
 struct intel_renderbuffer *
-intel_create_renderbuffer(GLenum intFormat)
+intel_create_renderbuffer(GLenum intFormat, int tiling)
 {
    GET_CURRENT_CONTEXT(ctx);
 
@@ -442,12 +443,14 @@ intel_create_renderbuffer(GLenum intFormat)
 
    irb->Base.InternalFormat = intFormat;
 
+   irb->tiling = tiling;
+
    /* intel-specific methods */
    irb->Base.Delete = intel_delete_renderbuffer;
    irb->Base.AllocStorage = intel_alloc_window_storage;
    irb->Base.GetPointer = intel_get_pointer;
    /* This sets the Get/PutRow/Value functions */
-   intel_set_span_functions(&irb->Base);
+   intel_set_span_functions(&irb->Base, tiling);
 
    return irb;
 }
@@ -519,7 +522,7 @@ intel_framebuffer_renderbuffer(GLcontext * ctx,
 
 static GLboolean
 intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, 
-                          struct gl_texture_image *texImage)
+		     struct gl_texture_image *texImage)
 {
    if (texImage->TexFormat == &_mesa_texformat_argb8888) {
       irb->Base._ActualFormat = GL_RGBA8;
@@ -558,7 +561,7 @@ intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb,
 
    irb->Base.Delete = intel_delete_renderbuffer;
    irb->Base.AllocStorage = intel_nop_alloc_storage;
-   intel_set_span_functions(&irb->Base);
+   intel_set_span_functions(&irb->Base, irb->tiling);
 
    irb->RenderToTexture = GL_TRUE;
 
@@ -586,6 +589,9 @@ intel_wrap_texture(GLcontext * ctx, struct gl_texture_image *texImage)
    _mesa_init_renderbuffer(&irb->Base, name);
    irb->Base.ClassID = INTEL_RB_CLASS;
 
+   /* XXX can we fix this? */
+   irb->tiling = INTEL_TILE_NONE;
+
    if (!intel_update_wrapper(ctx, irb, texImage)) {
       _mesa_free(irb);
       return NULL;
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h
index c90c84b48c..9e085a1992 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.h
+++ b/src/mesa/drivers/dri/intel/intel_fbo.h
@@ -72,6 +72,7 @@ struct intel_renderbuffer
    struct intel_region *region;
    void *pfMap;                 /* possibly paged flipped map pointer */
    GLuint pfPitch;              /* possibly paged flipped pitch */
+   int tiling;
    GLboolean RenderToTexture;   /* RTT? */
 
    GLuint PairedDepth;   /**< only used if this is a depth renderbuffer */
@@ -90,7 +91,7 @@ intel_renderbuffer_set_region(struct intel_renderbuffer *irb,
 			      struct intel_region *region);
 
 extern struct intel_renderbuffer *
-intel_create_renderbuffer(GLenum intFormat);
+intel_create_renderbuffer(GLenum intFormat, int tiling);
 
 extern void intel_fbo_init(struct intel_context *intel);
 
diff --git a/src/mesa/drivers/dri/intel/intel_ioctl.c b/src/mesa/drivers/dri/intel/intel_ioctl.c
index f4566ba89c..c0a18fa225 100644
--- a/src/mesa/drivers/dri/intel/intel_ioctl.c
+++ b/src/mesa/drivers/dri/intel/intel_ioctl.c
@@ -30,6 +30,8 @@
 #include <unistd.h>
 #include <errno.h>
 #include <sched.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
 
 #include "mtypes.h"
 #include "context.h"
@@ -43,7 +45,7 @@
 #include "drm.h"
 #include "i915_drm.h"
 
-#include "intel_bufmgr_ttm.h"
+#include "intel_bufmgr.h"
 
 #define FILE_DEBUG_FLAG DEBUG_IOCTL
 
@@ -104,7 +106,7 @@ intelWaitIrq(struct intel_context *intel, int seq)
 }
 
 
-void
+int
 intel_batch_ioctl(struct intel_context *intel,
                   GLuint start_offset,
                   GLuint used,
@@ -113,7 +115,7 @@ intel_batch_ioctl(struct intel_context *intel,
    struct drm_i915_batchbuffer batch;
 
    if (intel->no_hw)
-      return;
+      return 0;
 
    assert(intel->locked);
    assert(used);
@@ -142,82 +144,53 @@ intel_batch_ioctl(struct intel_context *intel,
    if (drmCommandWrite(intel->driFd, DRM_I915_BATCHBUFFER, &batch,
                        sizeof(batch))) {
       fprintf(stderr, "DRM_I915_BATCHBUFFER: %d\n", -errno);
-      UNLOCK_HARDWARE(intel);
-      exit(1);
+      return -errno;
    }
+
+   return 0;
 }
 
 #ifdef TTM_API
-void
+int
 intel_exec_ioctl(struct intel_context *intel,
 		 GLuint used,
 		 GLboolean ignore_cliprects, GLboolean allow_unlock,
-		 void *start, GLuint count, dri_fence **fence)
+		 struct drm_i915_gem_execbuffer *execbuf)
 {
-   struct drm_i915_execbuffer execbuf;
-   dri_fence *fo;
    int ret;
 
    assert(intel->locked);
    assert(used);
 
    if (intel->no_hw)
-      return;
-
-   if (*fence) {
-     dri_fence_unreference(*fence);
-   }
+      return 0;
 
-   memset(&execbuf, 0, sizeof(execbuf));
-
-   execbuf.num_buffers = count;
-   execbuf.batch.used = used;
-   execbuf.batch.cliprects = intel->pClipRects;
-   execbuf.batch.num_cliprects = ignore_cliprects ? 0 : intel->numClipRects;
-   execbuf.batch.DR1 = 0;
-   execbuf.batch.DR4 = ((((GLuint) intel->drawX) & 0xffff) |
-			(((GLuint) intel->drawY) << 16));
-
-   execbuf.ops_list = (unsigned long)start; // TODO
-   execbuf.fence_arg.flags = DRM_FENCE_FLAG_SHAREABLE | DRM_I915_FENCE_FLAG_FLUSHED;
+   execbuf->batch_start_offset = 0;
+   execbuf->batch_len = used;
+   execbuf->cliprects_ptr = (uintptr_t)intel->pClipRects;
+   execbuf->num_cliprects = ignore_cliprects ? 0 : intel->numClipRects;
+   execbuf->DR1 = 0;
+   execbuf->DR4 = ((((GLuint) intel->drawX) & 0xffff) |
+		   (((GLuint) intel->drawY) << 16));
 
    do {
-      ret = drmCommandWriteRead(intel->driFd, DRM_I915_EXECBUFFER, &execbuf,
-				sizeof(execbuf));
+      ret = ioctl(intel->driFd, DRM_IOCTL_I915_GEM_EXECBUFFER, execbuf);
    } while (ret == -EAGAIN);
 
    if (ret != 0) {
-      fprintf(stderr, "DRM_I915_EXECBUFFER: %d\n", -errno);
-      UNLOCK_HARDWARE(intel);
-      exit(1);
+      fprintf(stderr, "DRM_I915_GEM_EXECBUFFER: %d\n", -errno);
+      return -errno;
    }
 
-   if (execbuf.fence_arg.error != 0) {
-
-      /*
-       * Fence creation has failed, but the GPU has been
-       * idled by the kernel. Safe to continue.
-       */ 
-
-      *fence = NULL;
-      return;
-   }
-
-   fo = intel_ttm_fence_create_from_arg(intel->bufmgr, "fence buffers",
-					&execbuf.fence_arg);
-   if (!fo) {
-      fprintf(stderr, "failed to fence handle: %08x\n", execbuf.fence_arg.handle);
-      UNLOCK_HARDWARE(intel);
-      exit(1);
-   }
-   *fence = fo;
+   return 0;
 }
 #else
-void
-intel_exec_ioctl(struct intel_context *intel,
-		 GLuint used,
-		 GLboolean ignore_cliprects, GLboolean allow_unlock,
-		 void *start, GLuint count, dri_fence **fence)
+int
+int intel_exec_ioctl(struct intel_context *intel,
+		     GLuint used,
+		     GLboolean ignore_cliprects, GLboolean allow_unlock,
+		     struct drm_i915_gem_execbuffer *execbuf);
 {
+    return -EINVAL;
 }
 #endif
diff --git a/src/mesa/drivers/dri/intel/intel_ioctl.h b/src/mesa/drivers/dri/intel/intel_ioctl.h
index 8674aef723..526e38358c 100644
--- a/src/mesa/drivers/dri/intel/intel_ioctl.h
+++ b/src/mesa/drivers/dri/intel/intel_ioctl.h
@@ -33,14 +33,14 @@
 void intelWaitIrq( struct intel_context *intel, int seq );
 int intelEmitIrqLocked( struct intel_context *intel );
 
-void intel_batch_ioctl( struct intel_context *intel, 
-			GLuint start_offset,
-			GLuint used,
-			GLboolean ignore_cliprects,
-			GLboolean allow_unlock );
-void intel_exec_ioctl(struct intel_context *intel,
+int intel_batch_ioctl(struct intel_context *intel,
+		      GLuint start_offset,
 		      GLuint used,
-		      GLboolean ignore_cliprects, GLboolean allow_unlock,
-		      void *start, GLuint count, dri_fence **fence);
+		      GLboolean ignore_cliprects,
+		      GLboolean allow_unlock);
+int intel_exec_ioctl(struct intel_context *intel,
+		     GLuint used,
+		     GLboolean ignore_cliprects, GLboolean allow_unlock,
+		     struct drm_i915_gem_execbuffer *execbuf);
 
 #endif
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_draw.c b/src/mesa/drivers/dri/intel/intel_pixel_draw.c
index 34813d2aa0..569e992b5e 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_draw.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_draw.c
@@ -81,7 +81,8 @@ do_texture_drawpixels(GLcontext * ctx,
    else {
       /* PBO only for now:
        */
-/*       _mesa_printf("%s - not PBO\n", __FUNCTION__); */
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+	 _mesa_printf("%s - not PBO\n", __FUNCTION__);
       return GL_FALSE;
    }
 
@@ -218,7 +219,6 @@ do_blit_drawpixels(GLcontext * ctx,
    struct intel_buffer_object *src = intel_buffer_object(unpack->BufferObj);
    GLuint src_offset;
    GLuint rowLength;
-   dri_fence *fence = NULL;
 
    if (INTEL_DEBUG & DEBUG_PIXEL)
       _mesa_printf("%s\n", __FUNCTION__);
@@ -323,16 +323,9 @@ do_blit_drawpixels(GLcontext * ctx,
 			   ctx->Color.LogicOp : GL_COPY);
       }
       intel_batchbuffer_flush(intel->batch);
-      fence = intel->batch->last_fence;
-      dri_fence_reference(fence);
    }
    UNLOCK_HARDWARE(intel);
 
-   if (fence) {
-      dri_fence_wait(fence);
-      dri_fence_unreference(fence);
-   }
-
    if (INTEL_DEBUG & DEBUG_PIXEL)
       _mesa_printf("%s - DONE\n", __FUNCTION__);
 
diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h
index 37629c07e2..96af7e1a03 100644
--- a/src/mesa/drivers/dri/intel/intel_reg.h
+++ b/src/mesa/drivers/dri/intel/intel_reg.h
@@ -31,11 +31,140 @@
 
 #define MI_BATCH_BUFFER_END		(CMD_MI | 0xA << 23)
 
+#define MI_FLUSH			(CMD_MI | (4 << 23))
+#define FLUSH_MAP_CACHE				(1 << 0)
+#define INHIBIT_FLUSH_RENDER_CACHE		(1 << 2)
+
 /* Stalls command execution waiting for the given events to have occurred. */
 #define MI_WAIT_FOR_EVENT               (CMD_MI | (0x3 << 23))
 #define MI_WAIT_FOR_PLANE_B_FLIP        (1<<6)
 #define MI_WAIT_FOR_PLANE_A_FLIP        (1<<2)
 
+/* p189 */
+#define _3DSTATE_LOAD_STATE_IMMEDIATE_1   (CMD_3D | (0x1d<<24) | (0x04<<16))
+#define I1_LOAD_S(n)                      (1<<(4+n))
+
+/** @{
+ * 915 definitions
+ */
+#define S0_VB_OFFSET_MASK		0xffffffc
+#define S0_AUTO_CACHE_INV_DISABLE	(1<<0)
+/** @} */
+
+/** @{
+ * 830 definitions
+ */
+#define S0_VB_OFFSET_MASK_830		0xffffff8
+#define S0_VB_PITCH_SHIFT_830		1
+#define S0_VB_ENABLE_830		0
+/** @} */
+
+#define S1_VERTEX_WIDTH_SHIFT          24
+#define S1_VERTEX_WIDTH_MASK           (0x3f<<24)
+#define S1_VERTEX_PITCH_SHIFT          16
+#define S1_VERTEX_PITCH_MASK           (0x3f<<16)
+
+#define TEXCOORDFMT_2D                 0x0
+#define TEXCOORDFMT_3D                 0x1
+#define TEXCOORDFMT_4D                 0x2
+#define TEXCOORDFMT_1D                 0x3
+#define TEXCOORDFMT_2D_16              0x4
+#define TEXCOORDFMT_4D_16              0x5
+#define TEXCOORDFMT_NOT_PRESENT        0xf
+#define S2_TEXCOORD_FMT0_MASK            0xf
+#define S2_TEXCOORD_FMT1_SHIFT           4
+#define S2_TEXCOORD_FMT(unit, type)    ((type)<<(unit*4))
+#define S2_TEXCOORD_NONE               (~0)
+#define S2_TEX_COUNT_SHIFT_830		12
+#define S2_VERTEX_0_WIDTH_SHIFT_830	0
+#define S2_VERTEX_1_WIDTH_SHIFT_830	6
+/* S3 not interesting */
+
+#define S4_POINT_WIDTH_SHIFT           23
+#define S4_POINT_WIDTH_MASK            (0x1ff<<23)
+#define S4_LINE_WIDTH_SHIFT            19
+#define S4_LINE_WIDTH_ONE              (0x2<<19)
+#define S4_LINE_WIDTH_MASK             (0xf<<19)
+#define S4_FLATSHADE_ALPHA             (1<<18)
+#define S4_FLATSHADE_FOG               (1<<17)
+#define S4_FLATSHADE_SPECULAR          (1<<16)
+#define S4_FLATSHADE_COLOR             (1<<15)
+#define S4_CULLMODE_BOTH	       (0<<13)
+#define S4_CULLMODE_NONE	       (1<<13)
+#define S4_CULLMODE_CW		       (2<<13)
+#define S4_CULLMODE_CCW		       (3<<13)
+#define S4_CULLMODE_MASK	       (3<<13)
+#define S4_VFMT_POINT_WIDTH            (1<<12)
+#define S4_VFMT_SPEC_FOG               (1<<11)
+#define S4_VFMT_COLOR                  (1<<10)
+#define S4_VFMT_DEPTH_OFFSET           (1<<9)
+#define S4_VFMT_XYZ     	       (1<<6)
+#define S4_VFMT_XYZW     	       (2<<6)
+#define S4_VFMT_XY     		       (3<<6)
+#define S4_VFMT_XYW     	       (4<<6)
+#define S4_VFMT_XYZW_MASK              (7<<6)
+#define S4_FORCE_DEFAULT_DIFFUSE       (1<<5)
+#define S4_FORCE_DEFAULT_SPECULAR      (1<<4)
+#define S4_LOCAL_DEPTH_OFFSET_ENABLE   (1<<3)
+#define S4_VFMT_FOG_PARAM              (1<<2)
+#define S4_SPRITE_POINT_ENABLE         (1<<1)
+#define S4_LINE_ANTIALIAS_ENABLE       (1<<0)
+
+#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH   | 	\
+		      S4_VFMT_SPEC_FOG      |	\
+		      S4_VFMT_COLOR         |	\
+		      S4_VFMT_DEPTH_OFFSET  |	\
+		      S4_VFMT_XYZW_MASK     |	\
+		      S4_VFMT_FOG_PARAM)
+
+
+#define S5_WRITEDISABLE_ALPHA          (1<<31)
+#define S5_WRITEDISABLE_RED            (1<<30)
+#define S5_WRITEDISABLE_GREEN          (1<<29)
+#define S5_WRITEDISABLE_BLUE           (1<<28)
+#define S5_WRITEDISABLE_MASK           (0xf<<28)
+#define S5_FORCE_DEFAULT_POINT_SIZE    (1<<27)
+#define S5_LAST_PIXEL_ENABLE           (1<<26)
+#define S5_GLOBAL_DEPTH_OFFSET_ENABLE  (1<<25)
+#define S5_FOG_ENABLE                  (1<<24)
+#define S5_STENCIL_REF_SHIFT           16
+#define S5_STENCIL_REF_MASK            (0xff<<16)
+#define S5_STENCIL_TEST_FUNC_SHIFT     13
+#define S5_STENCIL_TEST_FUNC_MASK      (0x7<<13)
+#define S5_STENCIL_FAIL_SHIFT          10
+#define S5_STENCIL_FAIL_MASK           (0x7<<10)
+#define S5_STENCIL_PASS_Z_FAIL_SHIFT   7
+#define S5_STENCIL_PASS_Z_FAIL_MASK    (0x7<<7)
+#define S5_STENCIL_PASS_Z_PASS_SHIFT   4
+#define S5_STENCIL_PASS_Z_PASS_MASK    (0x7<<4)
+#define S5_STENCIL_WRITE_ENABLE        (1<<3)
+#define S5_STENCIL_TEST_ENABLE         (1<<2)
+#define S5_COLOR_DITHER_ENABLE         (1<<1)
+#define S5_LOGICOP_ENABLE              (1<<0)
+
+
+#define S6_ALPHA_TEST_ENABLE           (1<<31)
+#define S6_ALPHA_TEST_FUNC_SHIFT       28
+#define S6_ALPHA_TEST_FUNC_MASK        (0x7<<28)
+#define S6_ALPHA_REF_SHIFT             20
+#define S6_ALPHA_REF_MASK              (0xff<<20)
+#define S6_DEPTH_TEST_ENABLE           (1<<19)
+#define S6_DEPTH_TEST_FUNC_SHIFT       16
+#define S6_DEPTH_TEST_FUNC_MASK        (0x7<<16)
+#define S6_CBUF_BLEND_ENABLE           (1<<15)
+#define S6_CBUF_BLEND_FUNC_SHIFT       12
+#define S6_CBUF_BLEND_FUNC_MASK        (0x7<<12)
+#define S6_CBUF_SRC_BLEND_FACT_SHIFT   8
+#define S6_CBUF_SRC_BLEND_FACT_MASK    (0xf<<8)
+#define S6_CBUF_DST_BLEND_FACT_SHIFT   4
+#define S6_CBUF_DST_BLEND_FACT_MASK    (0xf<<4)
+#define S6_DEPTH_WRITE_ENABLE          (1<<3)
+#define S6_COLOR_WRITE_ENABLE          (1<<2)
+#define S6_TRISTRIP_PV_SHIFT           0
+#define S6_TRISTRIP_PV_MASK            (0x3<<0)
+
+#define S7_DEPTH_OFFSET_CONST_MASK     ~0
+
 /* Primitive dispatch on 830-945 */
 #define _3DPRIMITIVE			(CMD_3D | (0x1f << 24))
 #define PRIM_INDIRECT            (1<<23)
diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c
index 8bc548913f..c7e2c551dd 100644
--- a/src/mesa/drivers/dri/intel/intel_regions.c
+++ b/src/mesa/drivers/dri/intel/intel_regions.c
@@ -44,7 +44,7 @@
 #include "intel_blit.h"
 #include "intel_buffer_objects.h"
 #include "dri_bufmgr.h"
-#include "intel_bufmgr_ttm.h"
+#include "intel_bufmgr.h"
 #include "intel_batchbuffer.h"
 
 #define FILE_DEBUG_FLAG DEBUG_REGION
@@ -106,10 +106,7 @@ intel_region_alloc(struct intel_context *intel,
    dri_bo *buffer;
 
    buffer = dri_bo_alloc(intel->bufmgr, "region",
-			 pitch * cpp * height, 64,
-			 DRM_BO_FLAG_MEM_LOCAL |
-			 DRM_BO_FLAG_CACHED |
-			 DRM_BO_FLAG_CACHED_MAPPED);
+			 pitch * cpp * height, 64);
 
    return intel_region_alloc_internal(intel, cpp, pitch, height, 0, buffer);
 }
@@ -121,7 +118,7 @@ intel_region_alloc_for_handle(struct intel_context *intel,
 {
    dri_bo *buffer;
 
-   buffer = intel_ttm_bo_create_from_handle(intel->bufmgr, "region", handle);
+   buffer = intel_bo_gem_create_from_name(intel->bufmgr, "region", handle);
 
    return intel_region_alloc_internal(intel,
 				      cpp, pitch, height, tiled, buffer);
@@ -355,10 +352,7 @@ intel_region_release_pbo(struct intel_context *intel,
 
    region->buffer = dri_bo_alloc(intel->bufmgr, "region",
 				 region->pitch * region->cpp * region->height,
-				 64,
-				 DRM_BO_FLAG_MEM_LOCAL |
-				 DRM_BO_FLAG_CACHED |
-				 DRM_BO_FLAG_CACHED_MAPPED);
+				 64);
 }
 
 /* Break the COW tie to the pbo.  Both the pbo and the region end up
@@ -440,17 +434,16 @@ intel_recreate_static(struct intel_context *intel,
 
    if (intel->ttm) {
       assert(region_desc->bo_handle != -1);
-      region->buffer = intel_ttm_bo_create_from_handle(intel->bufmgr,
-						       name,
-						       region_desc->bo_handle);
+      region->buffer = intel_bo_gem_create_from_name(intel->bufmgr,
+						     name,
+						     region_desc->bo_handle);
    } else {
-      region->buffer = dri_bo_alloc_static(intel->bufmgr,
-					   name,
-					   region_desc->offset,
-					   intelScreen->pitch *
-					   intelScreen->height,
-					   region_desc->map,
-					   DRM_BO_FLAG_MEM_TT);
+      region->buffer = intel_bo_fake_alloc_static(intel->bufmgr,
+						  name,
+						  region_desc->offset,
+						  intelScreen->pitch *
+						  intelScreen->height,
+						  region_desc->map);
    }
 
    assert(region->buffer != NULL);
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c
index 5233e58fc9..8fd503ee8b 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -49,7 +49,7 @@
 #include "i830_dri.h"
 #include "intel_regions.h"
 #include "intel_batchbuffer.h"
-#include "intel_bufmgr_ttm.h"
+#include "intel_bufmgr.h"
 
 PUBLIC const char __driConfigOptions[] =
    DRI_CONF_BEGIN
@@ -59,7 +59,7 @@ PUBLIC const char __driConfigOptions[] =
       /* Options correspond to DRI_CONF_BO_REUSE_DISABLED,
        * DRI_CONF_BO_REUSE_ALL
        */
-      DRI_CONF_OPT_BEGIN_V(bo_reuse, enum, 0, "0:1")
+      DRI_CONF_OPT_BEGIN_V(bo_reuse, enum, 1, "0:1")
 	 DRI_CONF_DESC_BEGIN(en, "Buffer object reuse")
 	    DRI_CONF_ENUM(0, "Disable buffer object reuse")
 	    DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects")
@@ -221,16 +221,16 @@ intelPrintSAREA(const struct drm_i915_sarea * sarea)
            sarea->height);
    fprintf(stderr, "SAREA: pitch: %d\n", sarea->pitch);
    fprintf(stderr,
-           "SAREA: front offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
+           "SAREA: front offset: 0x%08x  size: 0x%x  handle: 0x%x tiled: %d\n",
            sarea->front_offset, sarea->front_size,
-           (unsigned) sarea->front_handle);
+           (unsigned) sarea->front_handle, sarea->front_tiled);
    fprintf(stderr,
-           "SAREA: back  offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
+           "SAREA: back  offset: 0x%08x  size: 0x%x  handle: 0x%x tiled: %d\n",
            sarea->back_offset, sarea->back_size,
-           (unsigned) sarea->back_handle);
-   fprintf(stderr, "SAREA: depth offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
+           (unsigned) sarea->back_handle, sarea->back_tiled);
+   fprintf(stderr, "SAREA: depth offset: 0x%08x  size: 0x%x  handle: 0x%x tiled: %d\n",
            sarea->depth_offset, sarea->depth_size,
-           (unsigned) sarea->depth_handle);
+           (unsigned) sarea->depth_handle, sarea->depth_tiled);
    fprintf(stderr, "SAREA: tex   offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
            sarea->tex_offset, sarea->tex_size, (unsigned) sarea->tex_handle);
 }
@@ -531,20 +531,23 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv,
 
       /* setup the hardware-based renderbuffers */
       {
-         intel_fb->color_rb[0] = intel_create_renderbuffer(rgbFormat);
+         intel_fb->color_rb[0] = intel_create_renderbuffer(rgbFormat, 
+							   screen->ttm ? screen->front.tiled : INTEL_TILE_NONE);
          _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_FRONT_LEFT,
 				&intel_fb->color_rb[0]->Base);
       }
 
       if (mesaVis->doubleBufferMode) {
-         intel_fb->color_rb[1] = intel_create_renderbuffer(rgbFormat);
+         intel_fb->color_rb[1] = intel_create_renderbuffer(rgbFormat,
+							   screen->ttm ? screen->back.tiled : INTEL_TILE_NONE);
          _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_BACK_LEFT,
 				&intel_fb->color_rb[1]->Base);
 
 	 if (screen->third.handle) {
 	    struct gl_renderbuffer *tmp_rb = NULL;
 
-	    intel_fb->color_rb[2] = intel_create_renderbuffer(rgbFormat);
+	    intel_fb->color_rb[2] = intel_create_renderbuffer(rgbFormat,
+							      screen->ttm ? screen->third.tiled : INTEL_TILE_NONE);
 	    _mesa_reference_renderbuffer(&tmp_rb, &intel_fb->color_rb[2]->Base);
 	 }
       }
@@ -553,7 +556,8 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv,
 	 if (mesaVis->stencilBits == 8) {
 	    /* combined depth/stencil buffer */
 	    struct intel_renderbuffer *depthStencilRb
-	       = intel_create_renderbuffer(GL_DEPTH24_STENCIL8_EXT);
+	       = intel_create_renderbuffer(GL_DEPTH24_STENCIL8_EXT,
+					   screen->ttm ? screen->depth.tiled : INTEL_TILE_NONE);
 	    /* note: bind RB to two attachment points */
 	    _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH,
 				   &depthStencilRb->Base);
@@ -561,7 +565,8 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv,
 				   &depthStencilRb->Base);
 	 } else {
 	    struct intel_renderbuffer *depthRb
-	       = intel_create_renderbuffer(GL_DEPTH_COMPONENT24);
+	       = intel_create_renderbuffer(GL_DEPTH_COMPONENT24,
+					   screen->ttm ? screen->depth.tiled : INTEL_TILE_NONE);
 	    _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH,
 				   &depthRb->Base);
 	 }
@@ -569,7 +574,8 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv,
       else if (mesaVis->depthBits == 16) {
          /* just 16-bit depth buffer, no hw stencil */
          struct intel_renderbuffer *depthRb
-            = intel_create_renderbuffer(GL_DEPTH_COMPONENT16);
+            = intel_create_renderbuffer(GL_DEPTH_COMPONENT16,
+					screen->ttm ? screen->depth.tiled : INTEL_TILE_NONE);
          _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, &depthRb->Base);
       }
 
diff --git a/src/mesa/drivers/dri/intel/intel_screen.h b/src/mesa/drivers/dri/intel/intel_screen.h
index e62b2d7c89..9a73b13951 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.h
+++ b/src/mesa/drivers/dri/intel/intel_screen.h
@@ -74,6 +74,8 @@ typedef struct
    int irq_active;
    int allow_batchbuffer;
 
+   int ttm;
+
    /**
    * Configuration cache with default values for all contexts
    */
diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c
index df4f5927a0..c6778b16ff 100644
--- a/src/mesa/drivers/dri/intel/intel_span.c
+++ b/src/mesa/drivers/dri/intel/intel_span.c
@@ -40,6 +40,137 @@
 #include "swrast/swrast.h"
 
 /*
+ * Deal with tiled surfaces
+ */
+
+#if 0
+/* These are pre-965 tile swizzling functions -- power of two widths */
+static uintptr_t x_tile_swizzle_pow2 (uintptr_t addr, int n)
+{
+	uintptr_t	a = addr;
+	uintptr_t	base_mask = (((~0) << (n + 4)) | 0xff);
+	uintptr_t	x_mask = ((~0) << 12) & ~base_mask;
+
+	a = ((a & base_mask) | 
+	     ((a >> (n-8)) & 0x7) |
+	     ((a << 3) & x_mask));
+	_mesa_printf ("x_swizzle %08x (base %x yrow %x tile#x %x xsword %x byte %x) %08x\n",
+		      addr,
+		      addr >> (n + 4),
+		      (addr >> (n + 1)) & 0x7,
+		      (addr >> 9) & ((1 << (n-8)) - 1),
+		      (addr >> 5) & 0xf,
+		      (addr & 0x1f),
+		      a);
+	return a;
+}
+
+static uintptr_t y_tile_swizzle_pow2 (uintptr_t addr, int n)
+{
+	uintptr_t	a = (uintptr_t) addr;
+	uintptr_t	base_mask = (((~0) << (n + 6)) | 0xf);
+	uintptr_t	x_mask = ((~0) << 9) & ~base_mask;
+
+	a = ((a & base_mask) | 
+	     ((a >> (n-3)) & 0x1f) |
+	     ((a << 5) & x_mask));
+	_mesa_printf ("y_swizzle %08x (base %x yrow %x tile#x %x xoword %x byte %x) %08x\n",
+		      addr,
+		      addr >> (n + 6),
+		      (addr >> (n + 1)) & 0x01f,
+		      (addr >> 7) & ((1 << (n-6)) - 1),
+		      (addr >> 4) & 0x7,
+		      (addr & 0xf),
+		      a);
+	return a;
+}
+#endif
+
+static GLubyte *x_tile_swizzle(struct intel_renderbuffer *irb, struct intel_context *intel,
+			       int x, int y)
+{
+	GLubyte	*buf = (GLubyte *) irb->pfMap;
+	int	tile_stride;
+	int	xbyte;
+	int	x_tile_off, y_tile_off;
+	int	x_tile_number, y_tile_number;
+	int	tile_off, tile_base;
+	
+	tile_stride = (irb->pfPitch * irb->region->cpp) << 3;
+	
+	x += intel->drawX;
+	y += intel->drawY;
+
+	xbyte = x * irb->region->cpp;
+
+	x_tile_off = xbyte & 0x1ff;
+	y_tile_off = y & 7;
+
+#ifndef I915
+	/* The documentation says that X tile layout is arranged in 8 512-byte
+	 * lines of pixel data.  However, that doesn't appear to be the case
+	 * on GM965, tested by drawing a 128x8 quad in no_rast mode.  For lines
+	 * 1,2,4, and 7 of each tile, each consecutive pair of 64-byte spans
+	 * has the locations of those spans swapped.
+	 */
+	switch (y_tile_off) {
+	case 1:
+	case 2:
+	case 4:
+	case 7:
+		x_tile_off ^= 64;
+		break;
+	default:
+	   break;
+	}
+#endif
+
+	x_tile_number = xbyte >> 9;
+	y_tile_number = y >> 3;
+
+	tile_off = (y_tile_off << 9) + x_tile_off;
+	tile_base = (x_tile_number << 12) + y_tile_number * tile_stride;
+
+#if 0
+	printf("(%d,%d) -> %d + %d = %d (pitch = %d, tstride = %d)\n",
+	       x, y, tile_off, tile_base,
+	       tile_off + tile_base,
+	       irb->pfPitch, tile_stride);
+#endif
+
+	return buf + tile_base + tile_off;
+}
+
+static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_context *intel,
+			       int x, int y)
+{
+	GLubyte	*buf = (GLubyte *) irb->pfMap;
+	int	tile_stride;
+	int	xbyte;
+	int	x_tile_off, y_tile_off;
+	int	x_tile_number, y_tile_number;
+	int	tile_off, tile_base;
+	
+	tile_stride = (irb->pfPitch * irb->region->cpp) << 3;
+	
+	x += intel->drawX;
+	y += intel->drawY;
+
+	xbyte = x * irb->region->cpp;
+
+	x_tile_off = xbyte & 0x7f;
+	y_tile_off = y & 0x1f;
+
+	x_tile_number = xbyte >> 7;
+	y_tile_number = y >> 5;
+
+	tile_off = ((x_tile_off & ~0xf) << 5) + (y_tile_off << 4) + (x_tile_off & 0xf);
+	tile_base = (x_tile_number << 12) + y_tile_number * tile_stride;
+
+	return buf + tile_base + tile_off;
+}
+
+/*
   break intelWriteRGBASpan_ARGB8888
 */
 
@@ -55,7 +186,7 @@
       + (intel->drawY * irb->pfPitch + intel->drawX) * irb->region->cpp;\
    GLuint p;								\
    assert(irb->pfMap);\
-   (void) p;
+   (void) p; (void) buf;
 
 /* XXX FBO: this is identical to the macro in spantmp2.h except we get
  * the cliprect info from the context, not the driDrawable.
@@ -69,12 +200,14 @@
 	 int miny = intel->pClipRects[_nc].y1 - intel->drawY;		\
 	 int maxx = intel->pClipRects[_nc].x2 - intel->drawX;		\
 	 int maxy = intel->pClipRects[_nc].y2 - intel->drawY;
-
-
-
+	
+#if 0
+      }}
+#endif
 
 #define Y_FLIP(_y) ((_y) * yScale + yBias)
 
+/* XXX with GEM, these need to tell the kernel */
 #define HW_LOCK()
 
 #define HW_UNLOCK()
@@ -99,6 +232,43 @@
 #define GET_PTR(X,Y) (buf + ((Y) * irb->pfPitch + (X)) * 4)
 #include "spantmp2.h"
 
+/* 16 bit RGB565 color tile spanline and pixel functions
+ */
+
+#define SPANTMP_PIXEL_FMT GL_RGB
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
+
+#define TAG(x)    intel_XTile_##x##_RGB565
+#define TAG2(x,y) intel_XTile_##x##_RGB565##y
+#define GET_PTR(X,Y) x_tile_swizzle(irb, intel, X, Y)
+#include "spantmp2.h"
+
+#define SPANTMP_PIXEL_FMT GL_RGB
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
+
+#define TAG(x)    intel_YTile_##x##_RGB565
+#define TAG2(x,y) intel_YTile_##x##_RGB565##y
+#define GET_PTR(X,Y) y_tile_swizzle(irb, intel, X, Y)
+#include "spantmp2.h"
+
+/* 32 bit ARGB888 color tile spanline and pixel functions
+ */
+
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x)    intel_XTile_##x##_ARGB8888
+#define TAG2(x,y) intel_XTile_##x##_ARGB8888##y
+#define GET_PTR(X,Y) x_tile_swizzle(irb, intel, X, Y)
+#include "spantmp2.h"
+
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x)    intel_YTile_##x##_ARGB8888
+#define TAG2(x,y) intel_YTile_##x##_ARGB8888##y
+#define GET_PTR(X,Y) y_tile_swizzle(irb, intel, X, Y)
+#include "spantmp2.h"
 
 #define LOCAL_DEPTH_VARS						\
    struct intel_context *intel = intel_context(ctx);			\
@@ -107,7 +277,7 @@
    const GLint yScale = irb->RenderToTexture ? 1 : -1;			\
    const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1;	\
    char *buf = (char *) irb->pfMap/*XXX use region->map*/ +             \
-      (intel->drawY * pitch + intel->drawX) * irb->region->cpp;
+      (intel->drawY * pitch + intel->drawX) * irb->region->cpp; (void) buf;
 
 
 #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
@@ -127,6 +297,33 @@
 
 
 /**
+ ** 16-bit x tile depthbuffer functions.
+ **/
+#define WRITE_DEPTH( _x, _y, d ) \
+   (*((GLushort *)x_tile_swizzle (irb, intel, _x, _y)) = d)
+
+#define READ_DEPTH( d, _x, _y )	\
+   d = *((GLushort *)x_tile_swizzle (irb, intel, _x, _y))
+
+
+#define TAG(x) intel_XTile_##x##_z16
+#include "depthtmp.h"
+
+/**
+ ** 16-bit y tile depthbuffer functions.
+ **/
+#define WRITE_DEPTH( _x, _y, d ) \
+   (*((GLushort *)y_tile_swizzle (irb, intel, _x, _y)) = d)
+
+#define READ_DEPTH( d, _x, _y )	\
+   (d = *((GLushort *)y_tile_swizzle (irb, intel, _x, _y)))
+
+
+#define TAG(x) intel_YTile_##x##_z16
+#include "depthtmp.h"
+
+
+/**
  ** 24/8-bit interleaved depth/stencil functions
  ** Note: we're actually reading back combined depth+stencil values.
  ** The wrappers in main/depthstencil.c are used to extract the depth
@@ -149,6 +346,49 @@
 
 
 /**
+ ** 24/8-bit x-tile interleaved depth/stencil functions
+ ** Note: we're actually reading back combined depth+stencil values.
+ ** The wrappers in main/depthstencil.c are used to extract the depth
+ ** and stencil values.
+ **/
+/* Change ZZZS -> SZZZ */
+#define WRITE_DEPTH( _x, _y, d ) {				\
+   GLuint tmp = ((d) >> 8) | ((d) << 24);			\
+   *((GLuint *)x_tile_swizzle (irb, intel, _x, _y)) = tmp;			\
+}
+
+/* Change SZZZ -> ZZZS */
+#define READ_DEPTH( d, _x, _y ) {				\
+   GLuint tmp = *((GLuint *)x_tile_swizzle (irb, intel, _x, _y));		\
+   d = (tmp << 8) | (tmp >> 24);				\
+}
+
+#define TAG(x) intel_XTile_##x##_z24_s8
+#include "depthtmp.h"
+
+/**
+ ** 24/8-bit y-tile interleaved depth/stencil functions
+ ** Note: we're actually reading back combined depth+stencil values.
+ ** The wrappers in main/depthstencil.c are used to extract the depth
+ ** and stencil values.
+ **/
+/* Change ZZZS -> SZZZ */
+#define WRITE_DEPTH( _x, _y, d ) {				\
+   GLuint tmp = ((d) >> 8) | ((d) << 24);			\
+   *((GLuint *)y_tile_swizzle (irb, intel, _x, _y)) = tmp;			\
+}
+
+/* Change SZZZ -> ZZZS */
+#define READ_DEPTH( d, _x, _y ) {				\
+   GLuint tmp = *((GLuint *)y_tile_swizzle (irb, intel, _x, _y));		\
+   d = (tmp << 8) | (tmp >> 24);				\
+}
+
+#define TAG(x) intel_YTile_##x##_z24_s8
+#include "depthtmp.h"
+
+
+/**
  ** 8-bit stencil function (XXX FBO: This is obsolete)
  **/
 #define WRITE_STENCIL( _x, _y, d ) {				\
@@ -164,6 +404,40 @@
 #define TAG(x) intel##x##_z24_s8
 #include "stenciltmp.h"
 
+/**
+ ** 8-bit x-tile stencil function (XXX FBO: This is obsolete)
+ **/
+#define WRITE_STENCIL( _x, _y, d ) {				\
+   GLuint *a = (GLuint *) x_tile_swizzle (irb, intel, _x, _y);  \
+   GLuint tmp = *a;					        \
+   tmp &= 0xffffff;						\
+   tmp |= ((d) << 24);						\
+   *a = tmp;						        \
+}
+
+#define READ_STENCIL( d, _x, _y )				\
+   (d = *((GLuint*) x_tile_swizzle (irb, intel, _x, _y)) >> 24)
+
+#define TAG(x) intel_XTile_##x##_z24_s8
+#include "stenciltmp.h"
+
+/**
+ ** 8-bit y-tile stencil function (XXX FBO: This is obsolete)
+ **/
+#define WRITE_STENCIL( _x, _y, d ) {				\
+   GLuint *a = (GLuint *) y_tile_swizzle (irb, intel, _x, _y);  \
+   GLuint tmp = *a;					        \
+   tmp &= 0xffffff;						\
+   tmp |= ((d) << 24);						\
+   *a = tmp;						        \
+}
+
+#define READ_STENCIL( d, _x, _y )				\
+   (d = *((GLuint*) y_tile_swizzle (irb, intel, _x, _y)) >> 24)
+
+#define TAG(x) intel_YTile_##x##_z24_s8
+#include "stenciltmp.h"
+
 
 
 /**
@@ -379,25 +653,80 @@ intelInitSpanFuncs(GLcontext * ctx)
  * These are used for the software fallbacks.
  */
 void
-intel_set_span_functions(struct gl_renderbuffer *rb)
+intel_set_span_functions(struct gl_renderbuffer *rb, int tiling)
 {
    if (rb->_ActualFormat == GL_RGB5) {
       /* 565 RGB */
-      intelInitPointers_RGB565(rb);
+      switch (tiling) {
+      case INTEL_TILE_NONE:
+      default:
+	 intelInitPointers_RGB565(rb);
+	 break;
+      case INTEL_TILE_X:
+	 intel_XTile_InitPointers_RGB565(rb);
+	 break;
+      case INTEL_TILE_Y:
+	 intel_YTile_InitPointers_RGB565(rb);
+	 break;
+      }
    }
    else if (rb->_ActualFormat == GL_RGBA8) {
       /* 8888 RGBA */
-      intelInitPointers_ARGB8888(rb);
+      switch (tiling) {
+      case INTEL_TILE_NONE:
+      default:
+	 intelInitPointers_ARGB8888(rb);
+	 break;
+      case INTEL_TILE_X:
+	 intel_XTile_InitPointers_ARGB8888(rb);
+	 break;
+      case INTEL_TILE_Y:
+	 intel_YTile_InitPointers_ARGB8888(rb);
+	 break;
+      }
    }
    else if (rb->_ActualFormat == GL_DEPTH_COMPONENT16) {
-      intelInitDepthPointers_z16(rb);
+      switch (tiling) {
+      case INTEL_TILE_NONE:
+      default:
+	 intelInitDepthPointers_z16(rb);
+	 break;
+      case INTEL_TILE_X:
+	 intel_XTile_InitDepthPointers_z16(rb);
+	 break;
+      case INTEL_TILE_Y:
+	 intel_YTile_InitDepthPointers_z16(rb);
+	 break;
+      }
    }
    else if (rb->_ActualFormat == GL_DEPTH_COMPONENT24 ||        /* XXX FBO remove */
             rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT) {
-      intelInitDepthPointers_z24_s8(rb);
+      switch (tiling) {
+      case INTEL_TILE_NONE:
+      default:
+	 intelInitDepthPointers_z24_s8(rb);
+	 break;
+      case INTEL_TILE_X:
+	 intel_XTile_InitDepthPointers_z24_s8(rb);
+	 break;
+      case INTEL_TILE_Y:
+	 intel_YTile_InitDepthPointers_z24_s8(rb);
+	 break;
+      }
    }
-   else if (rb->_ActualFormat == GL_STENCIL_INDEX8_EXT) {       /* XXX FBO remove */
-      intelInitStencilPointers_z24_s8(rb);
+   else if (rb->_ActualFormat == GL_STENCIL_INDEX8_EXT) {
+      switch (tiling) {
+      case INTEL_TILE_NONE:
+      default:
+	 intelInitStencilPointers_z24_s8(rb);
+	 break;
+      case INTEL_TILE_X:
+	 intel_XTile_InitStencilPointers_z24_s8(rb);
+	 break;
+      case INTEL_TILE_Y:
+	 intel_YTile_InitStencilPointers_z24_s8(rb);
+	 break;
+      }
    }
    else {
       _mesa_problem(NULL,
diff --git a/src/mesa/drivers/dri/intel/intel_span.h b/src/mesa/drivers/dri/intel/intel_span.h
index 5201f6d6c6..c56e5e1611 100644
--- a/src/mesa/drivers/dri/intel/intel_span.h
+++ b/src/mesa/drivers/dri/intel/intel_span.h
@@ -33,6 +33,10 @@ extern void intelInitSpanFuncs(GLcontext * ctx);
 extern void intelSpanRenderFinish(GLcontext * ctx);
 extern void intelSpanRenderStart(GLcontext * ctx);
 
-extern void intel_set_span_functions(struct gl_renderbuffer *rb);
+extern void intel_set_span_functions(struct gl_renderbuffer *rb, int tiling);
+
+#define INTEL_TILE_NONE	0
+#define INTEL_TILE_X	1
+#define INTEL_TILE_Y	2
 
 #endif