1 files changed, 235 insertions, 732 deletions
diff --git a/src/mesa/drivers/dri/i915/intel_batchbuffer.c b/src/mesa/drivers/dri/i915/intel_batchbuffer.c
index 803b41b256..045ff0a5b0 100644
--- a/src/mesa/drivers/dri/i915/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i915/intel_batchbuffer.c
@@ -1,6 +1,6 @@
 /**************************************************************************
  * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
  * All Rights Reserved.
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -25,805 +25,308 @@
  * 
  **************************************************************************/
 
-
-#include <stdio.h>
-#include <errno.h>
-
-#include "mtypes.h"
-#include "context.h"
-#include "enums.h"
-#include "vblank.h"
-
-#include "intel_reg.h"
 #include "intel_batchbuffer.h"
-#include "intel_context.h"
-
-
-
-
-/* ================================================================
- * Performance monitoring functions
+#include "intel_ioctl.h"
+#include "intel_decode.h"
+#include "i915_debug.h"
+
+/* Relocations in kernel space:
+ *    - pass dma buffer seperately
+ *    - memory manager knows how to patch
+ *    - pass list of dependent buffers
+ *    - pass relocation list
+ *
+ * Either:
+ *    - get back an offset for buffer to fire
+ *    - memory manager knows how to fire buffer
+ *
+ * Really want the buffer to be AGP and pinned.
+ *
  */
 
-static void intel_fill_box( intelContextPtr intel,
-			    GLshort x, GLshort y,
-			    GLshort w, GLshort h,
-			    GLubyte r, GLubyte g, GLubyte b )
-{
-   x += intel->drawX;
-   y += intel->drawY;
-
-   if (x >= 0 && y >= 0 &&
-       x+w < intel->intelScreen->width &&
-       y+h < intel->intelScreen->height)
-      intelEmitFillBlitLocked( intel, 
-			       intel->intelScreen->cpp,
-			       intel->intelScreen->back.pitch,
-			       intel->intelScreen->back.offset,
-			       x, y, w, h,
-			       INTEL_PACKCOLOR(intel->intelScreen->fbFormat,
-					       r,g,b,0xff));
-}
+/* Cliprect fence: The highest fence protecting a dma buffer
+ * containing explicit cliprect information.  Like the old drawable
+ * lock but irq-driven.  X server must wait for this fence to expire
+ * before changing cliprects [and then doing sw rendering?].  For
+ * other dma buffers, the scheduler will grab current cliprect info
+ * and mix into buffer.  X server must hold the lock while changing
+ * cliprects???  Make per-drawable.  Need cliprects in shared memory
+ * -- beats storing them with every cmd buffer in the queue.
+ *
+ * ==> X server must wait for this fence to expire before touching the
+ * framebuffer with new cliprects.
+ *
+ * ==> Cliprect-dependent buffers associated with a
+ * cliprect-timestamp.  All of the buffers associated with a timestamp
+ * must go to hardware before any buffer with a newer timestamp.
+ *
+ * ==> Dma should be queued per-drawable for correct X/GL
+ * synchronization.  Or can fences be used for this?
+ *
+ * Applies to: Blit operations, metaops, X server operations -- X
+ * server automatically waits on its own dma to complete before
+ * modifying cliprects ???
+ */
 
-static void intel_draw_performance_boxes( intelContextPtr intel )
+void
+intel_batchbuffer_reset(struct intel_batchbuffer *batch)
 {
-   /* Purple box for page flipping
-    */
-   if ( intel->perf_boxes & I830_BOX_FLIP ) 
-      intel_fill_box( intel, 4, 4, 8, 8, 255, 0, 255 );
-
-   /* Red box if we have to wait for idle at any point
-    */
-   if ( intel->perf_boxes & I830_BOX_WAIT ) 
-      intel_fill_box( intel, 16, 4, 8, 8, 255, 0, 0 );
-
-   /* Blue box: lost context?
-    */
-   if ( intel->perf_boxes & I830_BOX_LOST_CONTEXT ) 
-      intel_fill_box( intel, 28, 4, 8, 8, 0, 0, 255 );
-
-   /* Yellow box for texture swaps
-    */
-   if ( intel->perf_boxes & I830_BOX_TEXTURE_LOAD ) 
-      intel_fill_box( intel, 40, 4, 8, 8, 255, 255, 0 );
-
-   /* Green box if hardware never idles (as far as we can tell)
-    */
-   if ( !(intel->perf_boxes & I830_BOX_RING_EMPTY) ) 
-      intel_fill_box( intel, 64, 4, 8, 8, 0, 255, 0 );
+   struct intel_context *intel = batch->intel;
 
-
-   /* Draw bars indicating number of buffers allocated 
-    * (not a great measure, easily confused)
-    */
-#if 0
-   if (intel->dma_used) {
-      int bar = intel->dma_used / 10240;
-      if (bar > 100) bar = 100;
-      if (bar < 1) bar = 1;
-      intel_fill_box( intel, 4, 16, bar, 4, 196, 128, 128 );
-      intel->dma_used = 0;
+   if (batch->buf != NULL) {
+      dri_bo_unreference(batch->buf);
+      batch->buf = NULL;
    }
-#endif
 
-   intel->perf_boxes = 0;
+   batch->buf = dri_bo_alloc(intel->intelScreen->bufmgr, "batchbuffer",
+			     intel->intelScreen->maxBatchSize, 4096,
+			     DRM_BO_FLAG_MEM_TT);
+   dri_bo_map(batch->buf, GL_TRUE);
+   batch->map = batch->buf->virtual;
+   batch->size = intel->intelScreen->maxBatchSize;
+   batch->ptr = batch->map;
 }
 
-
-
-
-
-
-static int bad_prim_vertex_nr( int primitive, int nr )
+struct intel_batchbuffer *
+intel_batchbuffer_alloc(struct intel_context *intel)
 {
-   switch (primitive & PRIM3D_MASK) {
-   case PRIM3D_POINTLIST:
-      return nr < 1;
-   case PRIM3D_LINELIST:
-      return (nr & 1) || nr == 0;
-   case PRIM3D_LINESTRIP:
-      return nr < 2;
-   case PRIM3D_TRILIST:
-   case PRIM3D_RECTLIST:
-      return nr % 3 || nr == 0;
-   case PRIM3D_POLY:
-   case PRIM3D_TRIFAN:
-   case PRIM3D_TRISTRIP:
-   case PRIM3D_TRISTRIP_RVRSE:
-      return nr < 3;
-   default:
-      return 1;
-   }	
-}
+   struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1);
 
-static void intel_flush_inline_primitive( GLcontext *ctx )
-{
-   intelContextPtr intel = INTEL_CONTEXT( ctx );
-   GLuint used = intel->batch.ptr - intel->prim.start_ptr;
-   GLuint vertcount;
-
-   assert(intel->prim.primitive != ~0);
-
-   if (1) {
-      /* Check vertex size against the vertex we're specifying to
-       * hardware.  If it's wrong, ditch the primitive.
-       */ 
-      if (!intel->vtbl.check_vertex_size( intel, intel->vertex_size )) 
-	 goto do_discard;
-
-      vertcount = (used - 4)/ (intel->vertex_size * 4);
-
-      if (!vertcount)
-	 goto do_discard;
-      
-      if (vertcount * intel->vertex_size * 4 != used - 4) {
-	 fprintf(stderr, "vertex size confusion %d %d\n", used, 
-		 intel->vertex_size * vertcount * 4);
-	 goto do_discard;
-      }
+   batch->intel = intel;
+   batch->last_fence = NULL;
+   intel_batchbuffer_reset(batch);
 
-      if (bad_prim_vertex_nr( intel->prim.primitive, vertcount )) {
-	 fprintf(stderr, "bad_prim_vertex_nr %x %d\n", intel->prim.primitive,
-		 vertcount);
-	 goto do_discard;
-      }
-   }
-
-   if (used < 8)
-      goto do_discard;
-
-   *(int *)intel->prim.start_ptr = (_3DPRIMITIVE | 
-				    intel->prim.primitive |
-				    (used/4-2));
-
-   goto finished;
-   
- do_discard:
-   intel->batch.ptr -= used;
-   intel->batch.space += used;
-   assert(intel->batch.space >= 0);
-
- finished:
-   intel->prim.primitive = ~0;
-   intel->prim.start_ptr = 0;
-   intel->prim.flush = 0;
+   return batch;
 }
 
-
-/* Emit a primitive referencing vertices in a vertex buffer.
- */
-void intelStartInlinePrimitive( intelContextPtr intel, GLuint prim )
+void
+intel_batchbuffer_free(struct intel_batchbuffer *batch)
 {
-   BATCH_LOCALS;
-
-   if (0)
-      fprintf(stderr, "%s %x\n", __FUNCTION__, prim);
-
-
-   /* Finish any in-progress primitive:
-    */
-   INTEL_FIREVERTICES( intel );
-   
-   /* Emit outstanding state:
-    */
-   intel->vtbl.emit_state( intel );
-   
-   /* Make sure there is some space in this buffer:
-    */
-   if (intel->vertex_size * 10 * sizeof(GLuint) >= intel->batch.space) {
-      intelFlushBatch(intel, GL_TRUE); 
-      intel->vtbl.emit_state( intel );
+   if (batch->last_fence) {
+      dri_fence_wait(batch->last_fence);
+      dri_fence_unreference(batch->last_fence);
+      batch->last_fence = NULL;
    }
-
-#if 1
-   if (((unsigned long)intel->batch.ptr) & 0x4) {
-      BEGIN_BATCH(1);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
+   if (batch->map) {
+      dri_bo_unmap(batch->buf);
+      batch->map = NULL;
    }
-#endif
-
-   /* Emit a slot which will be filled with the inline primitive
-    * command later.
-    */
-   BEGIN_BATCH(2);
-   OUT_BATCH( 0 );
-
-   intel->prim.start_ptr = batch_ptr;
-   intel->prim.primitive = prim;
-   intel->prim.flush = intel_flush_inline_primitive;
-   intel->batch.contains_geometry = 1;
-
-   OUT_BATCH( 0 );
-   ADVANCE_BATCH();
+   dri_bo_unreference(batch->buf);
+   batch->buf = NULL;
+   free(batch);
 }
 
+static int
+relocation_sort(const void *a_in, const void *b_in) {
+   const struct buffer_reloc *a = a_in, *b = b_in;
 
-void intelRestartInlinePrimitive( intelContextPtr intel )
-{
-   GLuint prim = intel->prim.primitive;
-
-   intel_flush_inline_primitive( &intel->ctx );
-   if (1) intelFlushBatch(intel, GL_TRUE); /* GL_TRUE - is critical */
-   intelStartInlinePrimitive( intel, prim );
+   return (intptr_t)a->buf < (intptr_t)b->buf ? -1 : 1;
 }
 
 
-
-void intelWrapInlinePrimitive( intelContextPtr intel )
-{
-   GLuint prim = intel->prim.primitive;
-
-   if (0)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-   intel_flush_inline_primitive( &intel->ctx );
-   intelFlushBatch(intel, GL_TRUE);
-   intelStartInlinePrimitive( intel, prim );
-}
-
-
-/* Emit a primitive with space for inline vertices.
+/* TODO: Push this whole function into bufmgr.
  */
-GLuint *intelEmitInlinePrimitiveLocked(intelContextPtr intel, 
-				       int primitive,
-				       int dwords,
-				       int vertex_size )
+static void
+do_flush_locked(struct intel_batchbuffer *batch,
+                GLuint used,
+                GLboolean ignore_cliprects, GLboolean allow_unlock)
 {
-   GLuint *tmp = 0;
-   BATCH_LOCALS;
-
-   if (0)
-      fprintf(stderr, "%s 0x%x %d\n", __FUNCTION__, primitive, dwords);
-
-   /* Emit outstanding state:
+   GLuint *ptr;
+   GLuint i;
+   struct intel_context *intel = batch->intel;
+   dri_fence *fo;
+   GLboolean performed_rendering = GL_FALSE;
+
+   assert(batch->buf->virtual != NULL);
+   ptr = batch->buf->virtual;
+
+   /* Sort our relocation list in terms of referenced buffer pointer.
+    * This lets us uniquely validate the buffers with the sum of all the flags,
+    * while avoiding O(n^2) on number of relocations.
     */
-   intel->vtbl.emit_state( intel );
-
-   if ((1+dwords)*4 >= intel->batch.space) {
-      intelFlushBatch(intel, GL_TRUE); 
-      intel->vtbl.emit_state( intel );
-   }
-
-
-   if (1) {
-      int used = dwords * 4;
-      int vertcount;
+   qsort(batch->reloc, batch->nr_relocs, sizeof(batch->reloc[0]),
+	 relocation_sort);
 
-      /* Check vertex size against the vertex we're specifying to
-       * hardware.  If it's wrong, ditch the primitive.
-       */ 
-      if (!intel->vtbl.check_vertex_size( intel, vertex_size )) 
-	 goto do_discard;
-
-      vertcount = dwords / vertex_size;
-      
-      if (dwords % vertex_size) {
-	 fprintf(stderr, "did not request a whole number of vertices\n");
-	 goto do_discard;
-      }
+   /* Perform the necessary validations of buffers, and enter the relocations
+    * in the batchbuffer.
+    */
+   for (i = 0; i < batch->nr_relocs; i++) {
+      struct buffer_reloc *r = &batch->reloc[i];
+
+      if (r->validate_flags & DRM_BO_FLAG_WRITE)
+	 performed_rendering = GL_TRUE;
+
+      /* If this is the first time we've seen this buffer in the relocation
+       * list, figure out our flags and validate it.
+       */
+      if (i == 0 || batch->reloc[i - 1].buf != r->buf) {
+	 uint32_t validate_flags;
+	 int j, ret;
+
+	 /* Accumulate the flags we need for validating this buffer. */
+	 validate_flags = r->validate_flags;
+	 for (j = i + 1; j < batch->nr_relocs; j++) {
+	    if (batch->reloc[j].buf != r->buf)
+	       break;
+	    validate_flags |= batch->reloc[j].validate_flags;
+	 }
 
-      if (bad_prim_vertex_nr( primitive, vertcount )) {
-	 fprintf(stderr, "bad_prim_vertex_nr %x %d\n", primitive, vertcount);
-	 goto do_discard;
+	 /* Validate.  If we fail, fence to clear the unfenced list and bail
+	  * out.
+	  */
+	 ret = dri_bo_validate(r->buf, validate_flags);
+	 if (ret != 0) {
+	    dri_bo_unmap(batch->buf);
+	    fo = dri_fence_validated(intel->intelScreen->bufmgr,
+				     "batchbuffer failure fence", GL_TRUE);
+	    dri_fence_unreference(fo);
+	    goto done;
+	 }
       }
-
-      if (used < 8)
-	 goto do_discard;
+      ptr[r->offset / 4] = r->buf->offset + r->delta;
+      dri_bo_unreference(r->buf);
    }
 
-   /* Emit 3D_PRIMITIVE commands:
-    */
-   BEGIN_BATCH(1 + dwords);
-   OUT_BATCH( _3DPRIMITIVE | 
-	      primitive |
-	      (dwords-1) );
-
-   tmp = (GLuint *)batch_ptr;
-   batch_ptr += dwords * 4;
-
-   ADVANCE_BATCH();
-
-   intel->batch.contains_geometry = 1;
-
- do_discard:
-   return tmp;
-}
+   dri_bo_unmap(batch->buf);
+   batch->map = NULL;
+   batch->ptr = NULL;
 
+   dri_bo_validate(batch->buf, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE);
 
-static void intelWaitForFrameCompletion( intelContextPtr intel )
-{
-  drm_i915_sarea_t *sarea = (drm_i915_sarea_t *)intel->sarea;
+   batch->list_count = 0;
+   batch->nr_relocs = 0;
+   batch->flags = 0;
 
-   if (intel->do_irqs) {
-      if (intelGetLastFrame(intel) < sarea->last_dispatch) {
-	 if (!intel->irqsEmitted) {
-	    while (intelGetLastFrame (intel) < sarea->last_dispatch)
-	       ;
-	 }
-	 else {
-	    intelWaitIrq( intel, intel->alloc.irq_emitted );	
-	 }
-	 intel->irqsEmitted = 10;
-      }
+   /* Throw away non-effective packets.  Won't work once we have
+    * hardware contexts which would preserve statechanges beyond a
+    * single buffer.
+    */
 
-      if (intel->irqsEmitted) {
-	 LOCK_HARDWARE( intel ); 
-	 intelEmitIrqLocked( intel );
-	 intel->irqsEmitted--;
-	 UNLOCK_HARDWARE( intel ); 
-      }
-   } 
-   else {
-      while (intelGetLastFrame (intel) < sarea->last_dispatch) {
-	 if (intel->do_usleeps) 
-	    DO_USLEEP( 1 );
-      }
+   if (!(intel->numClipRects == 0 && !ignore_cliprects)) {
+      intel_batch_ioctl(batch->intel,
+                        batch->buf->offset,
+                        used, ignore_cliprects, allow_unlock);
    }
-}
 
-/*
- * Copy the back buffer to the front buffer. 
- */
-void intelCopyBuffer( const __DRIdrawablePrivate *dPriv,
-		      const drm_clip_rect_t	 *rect)
-{
-   intelContextPtr intel;
-   const intelScreenPrivate *intelScreen;
-   GLboolean   missed_target;
-   int64_t ust;
-
-   if (0)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
-   assert(dPriv);
-   assert(dPriv->driContextPriv);
-   assert(dPriv->driContextPriv->driverPrivate);
-
-   intel = (intelContextPtr) dPriv->driContextPriv->driverPrivate;
-
-   intelFlush( &intel->ctx );
-   
-   intelScreen = intel->intelScreen;
-
-   if (!rect && !intel->swap_scheduled && intelScreen->drmMinor >= 6 &&
-       !(intel->vblank_flags & VBLANK_FLAG_NO_IRQ) &&
-       intelScreen->current_rotation == 0) {
-      unsigned int interval = driGetVBlankInterval(dPriv, intel->vblank_flags);
-      unsigned int target;
-      drm_i915_vblank_swap_t swap;
-
-      swap.drawable = dPriv->hHWDrawable;
-      swap.seqtype = DRM_VBLANK_ABSOLUTE;
-      target = swap.sequence = intel->vbl_seq + interval;
-
-      if (intel->vblank_flags & VBLANK_FLAG_SYNC) {
-	 swap.seqtype |= DRM_VBLANK_NEXTONMISS;
-      } else if (interval == 0) {
-	 goto noschedule;
-      }
-
-      if ( intel->vblank_flags & VBLANK_FLAG_SECONDARY ) {
-	 swap.seqtype |= DRM_VBLANK_SECONDARY;
-      }
+   /* Associate a fence with the validated buffers, and note that we included
+    * a flush at the end.
+    */
+   fo = dri_fence_validated(intel->intelScreen->bufmgr,
+			    "Batch fence", GL_TRUE);
 
-      if (!drmCommandWriteRead(intel->driFd, DRM_I915_VBLANK_SWAP, &swap,
-                              sizeof(swap))) {
-        intel->swap_scheduled = 1;
-        intel->vbl_seq = swap.sequence;
-        swap.sequence -= target;
-        missed_target = swap.sequence > 0 && swap.sequence <= (1 << 23);
-      }
+   if (performed_rendering) {
+      dri_fence_unreference(batch->last_fence);
+      batch->last_fence = fo;
    } else {
-      intel->swap_scheduled = 0;
+     /* If we didn't validate any buffers for writing by the card, we don't
+      * need to track the fence for glFinish().
+      */
+      dri_fence_unreference(fo);
    }
-noschedule:
-
-   if (!intel->swap_scheduled) {
-      intelWaitForFrameCompletion( intel );
-      LOCK_HARDWARE( intel );
 
-      if (!rect)
-      {
-	 UNLOCK_HARDWARE( intel );
-	 driWaitForVBlank( dPriv, &intel->vbl_seq, intel->vblank_flags, & missed_target );
-	 LOCK_HARDWARE( intel );
-      }
-      {
-	 const intelScreenPrivate *intelScreen = intel->intelScreen;
-	 const __DRIdrawablePrivate *dPriv = intel->driDrawable;
-	 const int nbox = dPriv->numClipRects;
-	 const drm_clip_rect_t *pbox = dPriv->pClipRects;
-	 drm_clip_rect_t box;
-	 const int cpp = intelScreen->cpp;
-	 const int pitch = intelScreen->front.pitch; /* in bytes */
-	 int i;
-	 GLuint CMD, BR13;
-	 BATCH_LOCALS;
-
-	 switch(cpp) {
-	 case 2: 
-	    BR13 = (pitch) | (0xCC << 16) | (1<<24);
-	    CMD = XY_SRC_COPY_BLT_CMD;
-	    break;
-	 case 4:
-	    BR13 = (pitch) | (0xCC << 16) | (1<<24) | (1<<25);
-	    CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA |
-		   XY_SRC_COPY_BLT_WRITE_RGB);
-	    break;
-	 default:
-	    BR13 = (pitch) | (0xCC << 16) | (1<<24);
-	    CMD = XY_SRC_COPY_BLT_CMD;
-	    break;
-	 }
-   
-	 if (0) 
-	    intel_draw_performance_boxes( intel );
-
-	 for (i = 0 ; i < nbox; i++, pbox++) 
-	 {
-	    if (pbox->x1 > pbox->x2 ||
-		pbox->y1 > pbox->y2 ||
-		pbox->x2 > intelScreen->width ||
-		pbox->y2 > intelScreen->height) {
-	       _mesa_warning(&intel->ctx, "Bad cliprect in intelCopyBuffer()");
-	       continue;
-	    }
-
-	    box = *pbox;
-
-	    if (rect)
-	    {
-	       if (rect->x1 > box.x1)
-		  box.x1 = rect->x1;
-	       if (rect->y1 > box.y1)
-		  box.y1 = rect->y1;
-	       if (rect->x2 < box.x2)
-		  box.x2 = rect->x2;
-	       if (rect->y2 < box.y2)
-		  box.y2 = rect->y2;
-
-	       if (box.x1 > box.x2 || box.y1 > box.y2)
-		  continue;
-	    }
-
-	    BEGIN_BATCH( 8);
-	    OUT_BATCH( CMD );
-	    OUT_BATCH( BR13 );
-	    OUT_BATCH( (box.y1 << 16) | box.x1 );
-	    OUT_BATCH( (box.y2 << 16) | box.x2 );
-
-	    if (intel->sarea->pf_current_page == 0) 
-	       OUT_BATCH( intelScreen->front.offset );
-	    else
-	       OUT_BATCH( intelScreen->back.offset );			
-
-	    OUT_BATCH( (box.y1 << 16) | box.x1 );
-	    OUT_BATCH( BR13 & 0xffff );
-
-	    if (intel->sarea->pf_current_page == 0) 
-	       OUT_BATCH( intelScreen->back.offset );			
-	    else
-	       OUT_BATCH( intelScreen->front.offset );
-
-	    ADVANCE_BATCH();
-	 }
+   if (intel->numClipRects == 0 && !ignore_cliprects) {
+      if (allow_unlock) {
+	 /* If we are not doing any actual user-visible rendering,
+	  * do a sched_yield to keep the app from pegging the cpu while
+	  * achieving nothing.
+	  */
+         UNLOCK_HARDWARE(intel);
+         sched_yield();
+         LOCK_HARDWARE(intel);
       }
-      intelFlushBatchLocked( intel, GL_TRUE, GL_TRUE, GL_TRUE );
-      UNLOCK_HARDWARE( intel );
+      intel->vtbl.lost_hardware(intel);
    }
 
-   if (!rect)
-   {
-       intel->swap_count++;
-       (*dri_interface->getUST)(&ust);
-       if (missed_target) {
-	   intel->swap_missed_count++;
-	   intel->swap_missed_ust = ust -  intel->swap_ust;
-       }
-   
-       intel->swap_ust = ust;
+done:
+   if (INTEL_DEBUG & DEBUG_BATCH) {
+      dri_bo_map(batch->buf, GL_FALSE);
+      intel_decode(ptr, used / 4, batch->buf->offset,
+		   intel->intelScreen->deviceID);
+      dri_bo_unmap(batch->buf);
    }
 }
 
 
-
-
-void intelEmitFillBlitLocked( intelContextPtr intel,
-			      GLuint cpp,
-			      GLshort dst_pitch,  /* in bytes */
-			      GLuint dst_offset,
-			      GLshort x, GLshort y, 
-			      GLshort w, GLshort h,
-			      GLuint color )
+void
+intel_batchbuffer_flush(struct intel_batchbuffer *batch)
 {
-   GLuint BR13, CMD;
-   BATCH_LOCALS;
-
-   switch(cpp) {
-   case 1: 
-   case 2: 
-   case 3: 
-      BR13 = dst_pitch | (0xF0 << 16) | (1<<24);
-      CMD = XY_COLOR_BLT_CMD;
-      break;
-   case 4:
-      BR13 = dst_pitch | (0xF0 << 16) | (1<<24) | (1<<25);
-      CMD = (XY_COLOR_BLT_CMD | XY_COLOR_BLT_WRITE_ALPHA |
-	     XY_COLOR_BLT_WRITE_RGB);
-      break;
-   default:
-      return;
-   }
-
-   BEGIN_BATCH( 6);
-   OUT_BATCH( CMD );
-   OUT_BATCH( BR13 );
-   OUT_BATCH( (y << 16) | x );
-   OUT_BATCH( ((y+h) << 16) | (x+w) );
-   OUT_BATCH( dst_offset );
-   OUT_BATCH( color );
-   ADVANCE_BATCH();
-}
+   struct intel_context *intel = batch->intel;
+   GLuint used = batch->ptr - batch->map;
+   GLboolean was_locked = intel->locked;
 
-
-/* Copy BitBlt
- */
-void intelEmitCopyBlitLocked( intelContextPtr intel,
-			      GLuint cpp,
-			      GLshort src_pitch,
-			      GLuint  src_offset,
-			      GLshort dst_pitch,
-			      GLuint  dst_offset,
-			      GLshort src_x, GLshort src_y,
-			      GLshort dst_x, GLshort dst_y,
-			      GLshort w, GLshort h )
-{
-   GLuint CMD, BR13;
-   int dst_y2 = dst_y + h;
-   int dst_x2 = dst_x + w;
-   BATCH_LOCALS;
-
-   src_pitch *= cpp;
-   dst_pitch *= cpp;
-
-   switch(cpp) {
-   case 1: 
-   case 2: 
-   case 3: 
-      BR13 = dst_pitch | (0xCC << 16) | (1<<24);
-      CMD = XY_SRC_COPY_BLT_CMD;
-      break;
-   case 4:
-      BR13 = dst_pitch | (0xCC << 16) | (1<<24) | (1<<25);
-      CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA |
-	     XY_SRC_COPY_BLT_WRITE_RGB);
-      break;
-   default:
+   if (used == 0)
       return;
-   }
-
-   if (dst_y2 < dst_y ||
-       dst_x2 < dst_x) {
-      return;
-   }
-
-   BEGIN_BATCH( 12);
-   OUT_BATCH( CMD );
-   OUT_BATCH( BR13 );
-   OUT_BATCH( (dst_y << 16) | dst_x );
-   OUT_BATCH( (dst_y2 << 16) | dst_x2 );
-   OUT_BATCH( dst_offset );	
-   OUT_BATCH( (src_y << 16) | src_x );
-   OUT_BATCH( src_pitch );
-   OUT_BATCH( src_offset ); 
-   ADVANCE_BATCH();
-}
-
-
 
-void intelClearWithBlit(GLcontext *ctx, GLbitfield buffers, GLboolean allFoo,
-                        GLint cx1Foo, GLint cy1Foo, GLint cwFoo, GLint chFoo)
-{
-   intelContextPtr intel = INTEL_CONTEXT( ctx );
-   intelScreenPrivate *intelScreen = intel->intelScreen;
-   GLuint clear_depth, clear_color;
-   GLint cx, cy, cw, ch;
-   GLboolean all;
-   GLint pitch;
-   GLint cpp = intelScreen->cpp;
-   GLint i;
-   GLuint BR13, CMD, D_CMD;
-   BATCH_LOCALS;
-
-   intelFlush( &intel->ctx );
-   LOCK_HARDWARE( intel );
-
-   /* get clear bounds after locking */
-   cx = intel->ctx.DrawBuffer->_Xmin;
-   cy = intel->ctx.DrawBuffer->_Ymin;
-   cw = intel->ctx.DrawBuffer->_Xmax - cx;
-   ch = intel->ctx.DrawBuffer->_Ymax - cy;
-   all = (cw == intel->ctx.DrawBuffer->Width &&
-          ch == intel->ctx.DrawBuffer->Height);
-
-   pitch = intelScreen->front.pitch;
-
-   clear_color = intel->ClearColor;
-   clear_depth = 0;
-
-   if (buffers & BUFFER_BIT_DEPTH) {
-      clear_depth = (GLuint)(ctx->Depth.Clear * intel->ClearDepth);
-   }
-
-   if (buffers & BUFFER_BIT_STENCIL) {
-      clear_depth |= (ctx->Stencil.Clear & 0xff) << 24;
+   /* Add the MI_BATCH_BUFFER_END.  Always add an MI_FLUSH - this is a
+    * performance drain that we would like to avoid.
+    */
+   if (used & 4) {
+      ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd();
+      ((int *) batch->ptr)[1] = 0;
+      ((int *) batch->ptr)[2] = MI_BATCH_BUFFER_END;
+      used += 12;
    }
-
-   switch(cpp) {
-   case 2: 
-      BR13 = (0xF0 << 16) | (pitch) | (1<<24);
-      D_CMD = CMD = XY_COLOR_BLT_CMD;
-      break;
-   case 4:
-      BR13 = (0xF0 << 16) | (pitch) | (1<<24) | (1<<25);
-      CMD = (XY_COLOR_BLT_CMD |
-	     XY_COLOR_BLT_WRITE_ALPHA | 
-	     XY_COLOR_BLT_WRITE_RGB);
-      D_CMD = XY_COLOR_BLT_CMD;
-      if (buffers & BUFFER_BIT_DEPTH) D_CMD |= XY_COLOR_BLT_WRITE_RGB;
-      if (buffers & BUFFER_BIT_STENCIL) D_CMD |= XY_COLOR_BLT_WRITE_ALPHA;
-      break;
-   default:
-      BR13 = (0xF0 << 16) | (pitch) | (1<<24);
-      D_CMD = CMD = XY_COLOR_BLT_CMD;
-      break;
+   else {
+      ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd();
+      ((int *) batch->ptr)[1] = MI_BATCH_BUFFER_END;
+      used += 8;
    }
 
-   {
-      /* flip top to bottom */
-      cy = intel->driDrawable->h - cy - ch;
-      cx = cx + intel->drawX;
-      cy += intel->drawY;
-
-      /* adjust for page flipping */
-      if ( intel->sarea->pf_current_page == 1 ) {
-	 GLuint tmp = buffers;
-
-	 buffers &= ~(BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT);
-	 if ( tmp & BUFFER_BIT_FRONT_LEFT ) buffers |= BUFFER_BIT_BACK_LEFT;
-	 if ( tmp & BUFFER_BIT_BACK_LEFT )  buffers |= BUFFER_BIT_FRONT_LEFT;
-      }
-
-      for (i = 0 ; i < intel->numClipRects ; i++) 
-      { 	 
-	 drm_clip_rect_t *box = &intel->pClipRects[i];	 
-	 drm_clip_rect_t b;
-
-	 if (!all) {
-	    GLint x = box->x1;
-	    GLint y = box->y1;
-	    GLint w = box->x2 - x;
-	    GLint h = box->y2 - y;
-
-	    if (x < cx) w -= cx - x, x = cx; 
-	    if (y < cy) h -= cy - y, y = cy;
-	    if (x + w > cx + cw) w = cx + cw - x;
-	    if (y + h > cy + ch) h = cy + ch - y;
-	    if (w <= 0) continue;
-	    if (h <= 0) continue;
-
-	    b.x1 = x;
-	    b.y1 = y;
-	    b.x2 = x + w;
-	    b.y2 = y + h;      
-	 } else {
-	    b = *box;
-	 }
+   /* TODO: Just pass the relocation list and dma buffer up to the
+    * kernel.
+    */
+   if (!was_locked)
+      LOCK_HARDWARE(intel);
 
+   do_flush_locked(batch, used, !(batch->flags & INTEL_BATCH_CLIPRECTS),
+		   GL_FALSE);
 
-	 if (b.x1 > b.x2 ||
-	     b.y1 > b.y2 ||
-	     b.x2 > intelScreen->width ||
-	     b.y2 > intelScreen->height)
-	    continue;
-
-	 if ( buffers & BUFFER_BIT_FRONT_LEFT ) {	    
-	    BEGIN_BATCH( 6);	    
-	    OUT_BATCH( CMD );
-	    OUT_BATCH( BR13 );
-	    OUT_BATCH( (b.y1 << 16) | b.x1 );
-	    OUT_BATCH( (b.y2 << 16) | b.x2 );
-	    OUT_BATCH( intelScreen->front.offset );
-	    OUT_BATCH( clear_color );
-	    ADVANCE_BATCH();
-	 }
-
-	 if ( buffers & BUFFER_BIT_BACK_LEFT ) {
-	    BEGIN_BATCH( 6); 
-	    OUT_BATCH( CMD );
-	    OUT_BATCH( BR13 );
-	    OUT_BATCH( (b.y1 << 16) | b.x1 );
-	    OUT_BATCH( (b.y2 << 16) | b.x2 );
-	    OUT_BATCH( intelScreen->back.offset );
-	    OUT_BATCH( clear_color );
-	    ADVANCE_BATCH();
-	 }
+   if (!was_locked)
+      UNLOCK_HARDWARE(intel);
 
-	 if ( buffers & (BUFFER_BIT_STENCIL | BUFFER_BIT_DEPTH) ) {
-	    BEGIN_BATCH( 6);
-	    OUT_BATCH( D_CMD );
-	    OUT_BATCH( BR13 );
-	    OUT_BATCH( (b.y1 << 16) | b.x1 );
-	    OUT_BATCH( (b.y2 << 16) | b.x2 );
-	    OUT_BATCH( intelScreen->depth.offset );
-	    OUT_BATCH( clear_depth );
-	    ADVANCE_BATCH();
-	 }      
-      }
-   }
-   intelFlushBatchLocked( intel, GL_TRUE, GL_FALSE, GL_TRUE );
-   UNLOCK_HARDWARE( intel );
+   /* Reset the buffer:
+    */
+   intel_batchbuffer_reset(batch);
 }
 
-
-
-
-void intelDestroyBatchBuffer( GLcontext *ctx )
+void
+intel_batchbuffer_finish(struct intel_batchbuffer *batch)
 {
-   intelContextPtr intel = INTEL_CONTEXT(ctx);
-
-   if (intel->alloc.offset) {
-      intelFreeAGP( intel, intel->alloc.ptr );
-      intel->alloc.ptr = NULL;
-      intel->alloc.offset = 0;
-   }
-   else if (intel->alloc.ptr) {
-      free(intel->alloc.ptr);
-      intel->alloc.ptr = NULL;
-   }
-
-   memset(&intel->batch, 0, sizeof(intel->batch));
+   intel_batchbuffer_flush(batch);
+   if (batch->last_fence != NULL)
+      dri_fence_wait(batch->last_fence);
 }
 
 
-void intelInitBatchBuffer( GLcontext *ctx )
+/*  This is the only way buffers get added to the validate list.
+ */
+GLboolean
+intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
+                             dri_bo *buffer,
+                             GLuint flags, GLuint delta)
 {
-   intelContextPtr intel = INTEL_CONTEXT(ctx);
+   struct buffer_reloc *r = &batch->reloc[batch->nr_relocs++];
 
-   /* This path isn't really safe with rotate:
-    */
-   if (getenv("INTEL_BATCH") && intel->intelScreen->allow_batchbuffer) {      
-      switch (intel->intelScreen->deviceID) {
-      case PCI_CHIP_I865_G:
-	 /* HW bug?  Seems to crash if batchbuffer crosses 4k boundary.
-	  */
-	 intel->alloc.size = 8 * 1024; 
-	 break;
-      default:
-	 /* This is the smallest amount of memory the kernel deals with.
-	  * We'd ideally like to make this smaller.
-	  */
-	 intel->alloc.size = 1 << intel->intelScreen->logTextureGranularity;
-	 break;
-      }
+   assert(batch->nr_relocs <= MAX_RELOCS);
 
-      intel->alloc.ptr = intelAllocateAGP( intel, intel->alloc.size );
-      if (intel->alloc.ptr)
-	 intel->alloc.offset = 
-	    intelAgpOffsetFromVirtual( intel, intel->alloc.ptr );
-      else
-         intel->alloc.offset = 0; /* OK? */
-   }
+   dri_bo_reference(buffer);
+   r->buf = buffer;
+   r->offset = batch->ptr - batch->map;
+   r->delta = delta;
+   r->validate_flags = flags;
+
+   batch->ptr += 4;
+   return GL_TRUE;
+}
 
-   /* The default is now to use a local buffer and pass that to the
-    * kernel.  This is also a fallback if allocation fails on the
-    * above path:
-    */
-   if (!intel->alloc.ptr) {
-      intel->alloc.size = 8 * 1024;
-      intel->alloc.ptr = malloc( intel->alloc.size );
-      intel->alloc.offset = 0;
-   }
 
-   assert(intel->alloc.ptr);
+
+void
+intel_batchbuffer_data(struct intel_batchbuffer *batch,
+                       const void *data, GLuint bytes, GLuint flags)
+{
+   assert((bytes & 3) == 0);
+   intel_batchbuffer_require_space(batch, bytes, flags);
+   __memcpy(batch->ptr, data, bytes);
+   batch->ptr += bytes;
 }