summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i915/intel_batchbuffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/i915/intel_batchbuffer.c')
-rw-r--r--src/mesa/drivers/dri/i915/intel_batchbuffer.c967
1 files changed, 235 insertions, 732 deletions
diff --git a/src/mesa/drivers/dri/i915/intel_batchbuffer.c b/src/mesa/drivers/dri/i915/intel_batchbuffer.c
index 803b41b256..045ff0a5b0 100644
--- a/src/mesa/drivers/dri/i915/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i915/intel_batchbuffer.c
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -25,805 +25,308 @@
*
**************************************************************************/
-
-#include <stdio.h>
-#include <errno.h>
-
-#include "mtypes.h"
-#include "context.h"
-#include "enums.h"
-#include "vblank.h"
-
-#include "intel_reg.h"
#include "intel_batchbuffer.h"
-#include "intel_context.h"
-
-
-
-
-/* ================================================================
- * Performance monitoring functions
+#include "intel_ioctl.h"
+#include "intel_decode.h"
+#include "i915_debug.h"
+
+/* Relocations in kernel space:
+ * - pass dma buffer seperately
+ * - memory manager knows how to patch
+ * - pass list of dependent buffers
+ * - pass relocation list
+ *
+ * Either:
+ * - get back an offset for buffer to fire
+ * - memory manager knows how to fire buffer
+ *
+ * Really want the buffer to be AGP and pinned.
+ *
*/
-static void intel_fill_box( intelContextPtr intel,
- GLshort x, GLshort y,
- GLshort w, GLshort h,
- GLubyte r, GLubyte g, GLubyte b )
-{
- x += intel->drawX;
- y += intel->drawY;
-
- if (x >= 0 && y >= 0 &&
- x+w < intel->intelScreen->width &&
- y+h < intel->intelScreen->height)
- intelEmitFillBlitLocked( intel,
- intel->intelScreen->cpp,
- intel->intelScreen->back.pitch,
- intel->intelScreen->back.offset,
- x, y, w, h,
- INTEL_PACKCOLOR(intel->intelScreen->fbFormat,
- r,g,b,0xff));
-}
+/* Cliprect fence: The highest fence protecting a dma buffer
+ * containing explicit cliprect information. Like the old drawable
+ * lock but irq-driven. X server must wait for this fence to expire
+ * before changing cliprects [and then doing sw rendering?]. For
+ * other dma buffers, the scheduler will grab current cliprect info
+ * and mix into buffer. X server must hold the lock while changing
+ * cliprects??? Make per-drawable. Need cliprects in shared memory
+ * -- beats storing them with every cmd buffer in the queue.
+ *
+ * ==> X server must wait for this fence to expire before touching the
+ * framebuffer with new cliprects.
+ *
+ * ==> Cliprect-dependent buffers associated with a
+ * cliprect-timestamp. All of the buffers associated with a timestamp
+ * must go to hardware before any buffer with a newer timestamp.
+ *
+ * ==> Dma should be queued per-drawable for correct X/GL
+ * synchronization. Or can fences be used for this?
+ *
+ * Applies to: Blit operations, metaops, X server operations -- X
+ * server automatically waits on its own dma to complete before
+ * modifying cliprects ???
+ */
-static void intel_draw_performance_boxes( intelContextPtr intel )
+void
+intel_batchbuffer_reset(struct intel_batchbuffer *batch)
{
- /* Purple box for page flipping
- */
- if ( intel->perf_boxes & I830_BOX_FLIP )
- intel_fill_box( intel, 4, 4, 8, 8, 255, 0, 255 );
-
- /* Red box if we have to wait for idle at any point
- */
- if ( intel->perf_boxes & I830_BOX_WAIT )
- intel_fill_box( intel, 16, 4, 8, 8, 255, 0, 0 );
-
- /* Blue box: lost context?
- */
- if ( intel->perf_boxes & I830_BOX_LOST_CONTEXT )
- intel_fill_box( intel, 28, 4, 8, 8, 0, 0, 255 );
-
- /* Yellow box for texture swaps
- */
- if ( intel->perf_boxes & I830_BOX_TEXTURE_LOAD )
- intel_fill_box( intel, 40, 4, 8, 8, 255, 255, 0 );
-
- /* Green box if hardware never idles (as far as we can tell)
- */
- if ( !(intel->perf_boxes & I830_BOX_RING_EMPTY) )
- intel_fill_box( intel, 64, 4, 8, 8, 0, 255, 0 );
+ struct intel_context *intel = batch->intel;
-
- /* Draw bars indicating number of buffers allocated
- * (not a great measure, easily confused)
- */
-#if 0
- if (intel->dma_used) {
- int bar = intel->dma_used / 10240;
- if (bar > 100) bar = 100;
- if (bar < 1) bar = 1;
- intel_fill_box( intel, 4, 16, bar, 4, 196, 128, 128 );
- intel->dma_used = 0;
+ if (batch->buf != NULL) {
+ dri_bo_unreference(batch->buf);
+ batch->buf = NULL;
}
-#endif
- intel->perf_boxes = 0;
+ batch->buf = dri_bo_alloc(intel->intelScreen->bufmgr, "batchbuffer",
+ intel->intelScreen->maxBatchSize, 4096,
+ DRM_BO_FLAG_MEM_TT);
+ dri_bo_map(batch->buf, GL_TRUE);
+ batch->map = batch->buf->virtual;
+ batch->size = intel->intelScreen->maxBatchSize;
+ batch->ptr = batch->map;
}
-
-
-
-
-
-static int bad_prim_vertex_nr( int primitive, int nr )
+struct intel_batchbuffer *
+intel_batchbuffer_alloc(struct intel_context *intel)
{
- switch (primitive & PRIM3D_MASK) {
- case PRIM3D_POINTLIST:
- return nr < 1;
- case PRIM3D_LINELIST:
- return (nr & 1) || nr == 0;
- case PRIM3D_LINESTRIP:
- return nr < 2;
- case PRIM3D_TRILIST:
- case PRIM3D_RECTLIST:
- return nr % 3 || nr == 0;
- case PRIM3D_POLY:
- case PRIM3D_TRIFAN:
- case PRIM3D_TRISTRIP:
- case PRIM3D_TRISTRIP_RVRSE:
- return nr < 3;
- default:
- return 1;
- }
-}
+ struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1);
-static void intel_flush_inline_primitive( GLcontext *ctx )
-{
- intelContextPtr intel = INTEL_CONTEXT( ctx );
- GLuint used = intel->batch.ptr - intel->prim.start_ptr;
- GLuint vertcount;
-
- assert(intel->prim.primitive != ~0);
-
- if (1) {
- /* Check vertex size against the vertex we're specifying to
- * hardware. If it's wrong, ditch the primitive.
- */
- if (!intel->vtbl.check_vertex_size( intel, intel->vertex_size ))
- goto do_discard;
-
- vertcount = (used - 4)/ (intel->vertex_size * 4);
-
- if (!vertcount)
- goto do_discard;
-
- if (vertcount * intel->vertex_size * 4 != used - 4) {
- fprintf(stderr, "vertex size confusion %d %d\n", used,
- intel->vertex_size * vertcount * 4);
- goto do_discard;
- }
+ batch->intel = intel;
+ batch->last_fence = NULL;
+ intel_batchbuffer_reset(batch);
- if (bad_prim_vertex_nr( intel->prim.primitive, vertcount )) {
- fprintf(stderr, "bad_prim_vertex_nr %x %d\n", intel->prim.primitive,
- vertcount);
- goto do_discard;
- }
- }
-
- if (used < 8)
- goto do_discard;
-
- *(int *)intel->prim.start_ptr = (_3DPRIMITIVE |
- intel->prim.primitive |
- (used/4-2));
-
- goto finished;
-
- do_discard:
- intel->batch.ptr -= used;
- intel->batch.space += used;
- assert(intel->batch.space >= 0);
-
- finished:
- intel->prim.primitive = ~0;
- intel->prim.start_ptr = 0;
- intel->prim.flush = 0;
+ return batch;
}
-
-/* Emit a primitive referencing vertices in a vertex buffer.
- */
-void intelStartInlinePrimitive( intelContextPtr intel, GLuint prim )
+void
+intel_batchbuffer_free(struct intel_batchbuffer *batch)
{
- BATCH_LOCALS;
-
- if (0)
- fprintf(stderr, "%s %x\n", __FUNCTION__, prim);
-
-
- /* Finish any in-progress primitive:
- */
- INTEL_FIREVERTICES( intel );
-
- /* Emit outstanding state:
- */
- intel->vtbl.emit_state( intel );
-
- /* Make sure there is some space in this buffer:
- */
- if (intel->vertex_size * 10 * sizeof(GLuint) >= intel->batch.space) {
- intelFlushBatch(intel, GL_TRUE);
- intel->vtbl.emit_state( intel );
+ if (batch->last_fence) {
+ dri_fence_wait(batch->last_fence);
+ dri_fence_unreference(batch->last_fence);
+ batch->last_fence = NULL;
}
-
-#if 1
- if (((unsigned long)intel->batch.ptr) & 0x4) {
- BEGIN_BATCH(1);
- OUT_BATCH(0);
- ADVANCE_BATCH();
+ if (batch->map) {
+ dri_bo_unmap(batch->buf);
+ batch->map = NULL;
}
-#endif
-
- /* Emit a slot which will be filled with the inline primitive
- * command later.
- */
- BEGIN_BATCH(2);
- OUT_BATCH( 0 );
-
- intel->prim.start_ptr = batch_ptr;
- intel->prim.primitive = prim;
- intel->prim.flush = intel_flush_inline_primitive;
- intel->batch.contains_geometry = 1;
-
- OUT_BATCH( 0 );
- ADVANCE_BATCH();
+ dri_bo_unreference(batch->buf);
+ batch->buf = NULL;
+ free(batch);
}
+static int
+relocation_sort(const void *a_in, const void *b_in) {
+ const struct buffer_reloc *a = a_in, *b = b_in;
-void intelRestartInlinePrimitive( intelContextPtr intel )
-{
- GLuint prim = intel->prim.primitive;
-
- intel_flush_inline_primitive( &intel->ctx );
- if (1) intelFlushBatch(intel, GL_TRUE); /* GL_TRUE - is critical */
- intelStartInlinePrimitive( intel, prim );
+ return (intptr_t)a->buf < (intptr_t)b->buf ? -1 : 1;
}
-
-void intelWrapInlinePrimitive( intelContextPtr intel )
-{
- GLuint prim = intel->prim.primitive;
-
- if (0)
- fprintf(stderr, "%s\n", __FUNCTION__);
- intel_flush_inline_primitive( &intel->ctx );
- intelFlushBatch(intel, GL_TRUE);
- intelStartInlinePrimitive( intel, prim );
-}
-
-
-/* Emit a primitive with space for inline vertices.
+/* TODO: Push this whole function into bufmgr.
*/
-GLuint *intelEmitInlinePrimitiveLocked(intelContextPtr intel,
- int primitive,
- int dwords,
- int vertex_size )
+static void
+do_flush_locked(struct intel_batchbuffer *batch,
+ GLuint used,
+ GLboolean ignore_cliprects, GLboolean allow_unlock)
{
- GLuint *tmp = 0;
- BATCH_LOCALS;
-
- if (0)
- fprintf(stderr, "%s 0x%x %d\n", __FUNCTION__, primitive, dwords);
-
- /* Emit outstanding state:
+ GLuint *ptr;
+ GLuint i;
+ struct intel_context *intel = batch->intel;
+ dri_fence *fo;
+ GLboolean performed_rendering = GL_FALSE;
+
+ assert(batch->buf->virtual != NULL);
+ ptr = batch->buf->virtual;
+
+ /* Sort our relocation list in terms of referenced buffer pointer.
+ * This lets us uniquely validate the buffers with the sum of all the flags,
+ * while avoiding O(n^2) on number of relocations.
*/
- intel->vtbl.emit_state( intel );
-
- if ((1+dwords)*4 >= intel->batch.space) {
- intelFlushBatch(intel, GL_TRUE);
- intel->vtbl.emit_state( intel );
- }
-
-
- if (1) {
- int used = dwords * 4;
- int vertcount;
+ qsort(batch->reloc, batch->nr_relocs, sizeof(batch->reloc[0]),
+ relocation_sort);
- /* Check vertex size against the vertex we're specifying to
- * hardware. If it's wrong, ditch the primitive.
- */
- if (!intel->vtbl.check_vertex_size( intel, vertex_size ))
- goto do_discard;
-
- vertcount = dwords / vertex_size;
-
- if (dwords % vertex_size) {
- fprintf(stderr, "did not request a whole number of vertices\n");
- goto do_discard;
- }
+ /* Perform the necessary validations of buffers, and enter the relocations
+ * in the batchbuffer.
+ */
+ for (i = 0; i < batch->nr_relocs; i++) {
+ struct buffer_reloc *r = &batch->reloc[i];
+
+ if (r->validate_flags & DRM_BO_FLAG_WRITE)
+ performed_rendering = GL_TRUE;
+
+ /* If this is the first time we've seen this buffer in the relocation
+ * list, figure out our flags and validate it.
+ */
+ if (i == 0 || batch->reloc[i - 1].buf != r->buf) {
+ uint32_t validate_flags;
+ int j, ret;
+
+ /* Accumulate the flags we need for validating this buffer. */
+ validate_flags = r->validate_flags;
+ for (j = i + 1; j < batch->nr_relocs; j++) {
+ if (batch->reloc[j].buf != r->buf)
+ break;
+ validate_flags |= batch->reloc[j].validate_flags;
+ }
- if (bad_prim_vertex_nr( primitive, vertcount )) {
- fprintf(stderr, "bad_prim_vertex_nr %x %d\n", primitive, vertcount);
- goto do_discard;
+ /* Validate. If we fail, fence to clear the unfenced list and bail
+ * out.
+ */
+ ret = dri_bo_validate(r->buf, validate_flags);
+ if (ret != 0) {
+ dri_bo_unmap(batch->buf);
+ fo = dri_fence_validated(intel->intelScreen->bufmgr,
+ "batchbuffer failure fence", GL_TRUE);
+ dri_fence_unreference(fo);
+ goto done;
+ }
}
-
- if (used < 8)
- goto do_discard;
+ ptr[r->offset / 4] = r->buf->offset + r->delta;
+ dri_bo_unreference(r->buf);
}
- /* Emit 3D_PRIMITIVE commands:
- */
- BEGIN_BATCH(1 + dwords);
- OUT_BATCH( _3DPRIMITIVE |
- primitive |
- (dwords-1) );
-
- tmp = (GLuint *)batch_ptr;
- batch_ptr += dwords * 4;
-
- ADVANCE_BATCH();
-
- intel->batch.contains_geometry = 1;
-
- do_discard:
- return tmp;
-}
+ dri_bo_unmap(batch->buf);
+ batch->map = NULL;
+ batch->ptr = NULL;
+ dri_bo_validate(batch->buf, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE);
-static void intelWaitForFrameCompletion( intelContextPtr intel )
-{
- drm_i915_sarea_t *sarea = (drm_i915_sarea_t *)intel->sarea;
+ batch->list_count = 0;
+ batch->nr_relocs = 0;
+ batch->flags = 0;
- if (intel->do_irqs) {
- if (intelGetLastFrame(intel) < sarea->last_dispatch) {
- if (!intel->irqsEmitted) {
- while (intelGetLastFrame (intel) < sarea->last_dispatch)
- ;
- }
- else {
- intelWaitIrq( intel, intel->alloc.irq_emitted );
- }
- intel->irqsEmitted = 10;
- }
+ /* Throw away non-effective packets. Won't work once we have
+ * hardware contexts which would preserve statechanges beyond a
+ * single buffer.
+ */
- if (intel->irqsEmitted) {
- LOCK_HARDWARE( intel );
- intelEmitIrqLocked( intel );
- intel->irqsEmitted--;
- UNLOCK_HARDWARE( intel );
- }
- }
- else {
- while (intelGetLastFrame (intel) < sarea->last_dispatch) {
- if (intel->do_usleeps)
- DO_USLEEP( 1 );
- }
+ if (!(intel->numClipRects == 0 && !ignore_cliprects)) {
+ intel_batch_ioctl(batch->intel,
+ batch->buf->offset,
+ used, ignore_cliprects, allow_unlock);
}
-}
-/*
- * Copy the back buffer to the front buffer.
- */
-void intelCopyBuffer( const __DRIdrawablePrivate *dPriv,
- const drm_clip_rect_t *rect)
-{
- intelContextPtr intel;
- const intelScreenPrivate *intelScreen;
- GLboolean missed_target;
- int64_t ust;
-
- if (0)
- fprintf(stderr, "%s\n", __FUNCTION__);
-
- assert(dPriv);
- assert(dPriv->driContextPriv);
- assert(dPriv->driContextPriv->driverPrivate);
-
- intel = (intelContextPtr) dPriv->driContextPriv->driverPrivate;
-
- intelFlush( &intel->ctx );
-
- intelScreen = intel->intelScreen;
-
- if (!rect && !intel->swap_scheduled && intelScreen->drmMinor >= 6 &&
- !(intel->vblank_flags & VBLANK_FLAG_NO_IRQ) &&
- intelScreen->current_rotation == 0) {
- unsigned int interval = driGetVBlankInterval(dPriv, intel->vblank_flags);
- unsigned int target;
- drm_i915_vblank_swap_t swap;
-
- swap.drawable = dPriv->hHWDrawable;
- swap.seqtype = DRM_VBLANK_ABSOLUTE;
- target = swap.sequence = intel->vbl_seq + interval;
-
- if (intel->vblank_flags & VBLANK_FLAG_SYNC) {
- swap.seqtype |= DRM_VBLANK_NEXTONMISS;
- } else if (interval == 0) {
- goto noschedule;
- }
-
- if ( intel->vblank_flags & VBLANK_FLAG_SECONDARY ) {
- swap.seqtype |= DRM_VBLANK_SECONDARY;
- }
+ /* Associate a fence with the validated buffers, and note that we included
+ * a flush at the end.
+ */
+ fo = dri_fence_validated(intel->intelScreen->bufmgr,
+ "Batch fence", GL_TRUE);
- if (!drmCommandWriteRead(intel->driFd, DRM_I915_VBLANK_SWAP, &swap,
- sizeof(swap))) {
- intel->swap_scheduled = 1;
- intel->vbl_seq = swap.sequence;
- swap.sequence -= target;
- missed_target = swap.sequence > 0 && swap.sequence <= (1 << 23);
- }
+ if (performed_rendering) {
+ dri_fence_unreference(batch->last_fence);
+ batch->last_fence = fo;
} else {
- intel->swap_scheduled = 0;
+ /* If we didn't validate any buffers for writing by the card, we don't
+ * need to track the fence for glFinish().
+ */
+ dri_fence_unreference(fo);
}
-noschedule:
-
- if (!intel->swap_scheduled) {
- intelWaitForFrameCompletion( intel );
- LOCK_HARDWARE( intel );
- if (!rect)
- {
- UNLOCK_HARDWARE( intel );
- driWaitForVBlank( dPriv, &intel->vbl_seq, intel->vblank_flags, & missed_target );
- LOCK_HARDWARE( intel );
- }
- {
- const intelScreenPrivate *intelScreen = intel->intelScreen;
- const __DRIdrawablePrivate *dPriv = intel->driDrawable;
- const int nbox = dPriv->numClipRects;
- const drm_clip_rect_t *pbox = dPriv->pClipRects;
- drm_clip_rect_t box;
- const int cpp = intelScreen->cpp;
- const int pitch = intelScreen->front.pitch; /* in bytes */
- int i;
- GLuint CMD, BR13;
- BATCH_LOCALS;
-
- switch(cpp) {
- case 2:
- BR13 = (pitch) | (0xCC << 16) | (1<<24);
- CMD = XY_SRC_COPY_BLT_CMD;
- break;
- case 4:
- BR13 = (pitch) | (0xCC << 16) | (1<<24) | (1<<25);
- CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA |
- XY_SRC_COPY_BLT_WRITE_RGB);
- break;
- default:
- BR13 = (pitch) | (0xCC << 16) | (1<<24);
- CMD = XY_SRC_COPY_BLT_CMD;
- break;
- }
-
- if (0)
- intel_draw_performance_boxes( intel );
-
- for (i = 0 ; i < nbox; i++, pbox++)
- {
- if (pbox->x1 > pbox->x2 ||
- pbox->y1 > pbox->y2 ||
- pbox->x2 > intelScreen->width ||
- pbox->y2 > intelScreen->height) {
- _mesa_warning(&intel->ctx, "Bad cliprect in intelCopyBuffer()");
- continue;
- }
-
- box = *pbox;
-
- if (rect)
- {
- if (rect->x1 > box.x1)
- box.x1 = rect->x1;
- if (rect->y1 > box.y1)
- box.y1 = rect->y1;
- if (rect->x2 < box.x2)
- box.x2 = rect->x2;
- if (rect->y2 < box.y2)
- box.y2 = rect->y2;
-
- if (box.x1 > box.x2 || box.y1 > box.y2)
- continue;
- }
-
- BEGIN_BATCH( 8);
- OUT_BATCH( CMD );
- OUT_BATCH( BR13 );
- OUT_BATCH( (box.y1 << 16) | box.x1 );
- OUT_BATCH( (box.y2 << 16) | box.x2 );
-
- if (intel->sarea->pf_current_page == 0)
- OUT_BATCH( intelScreen->front.offset );
- else
- OUT_BATCH( intelScreen->back.offset );
-
- OUT_BATCH( (box.y1 << 16) | box.x1 );
- OUT_BATCH( BR13 & 0xffff );
-
- if (intel->sarea->pf_current_page == 0)
- OUT_BATCH( intelScreen->back.offset );
- else
- OUT_BATCH( intelScreen->front.offset );
-
- ADVANCE_BATCH();
- }
+ if (intel->numClipRects == 0 && !ignore_cliprects) {
+ if (allow_unlock) {
+ /* If we are not doing any actual user-visible rendering,
+ * do a sched_yield to keep the app from pegging the cpu while
+ * achieving nothing.
+ */
+ UNLOCK_HARDWARE(intel);
+ sched_yield();
+ LOCK_HARDWARE(intel);
}
- intelFlushBatchLocked( intel, GL_TRUE, GL_TRUE, GL_TRUE );
- UNLOCK_HARDWARE( intel );
+ intel->vtbl.lost_hardware(intel);
}
- if (!rect)
- {
- intel->swap_count++;
- (*dri_interface->getUST)(&ust);
- if (missed_target) {
- intel->swap_missed_count++;
- intel->swap_missed_ust = ust - intel->swap_ust;
- }
-
- intel->swap_ust = ust;
+done:
+ if (INTEL_DEBUG & DEBUG_BATCH) {
+ dri_bo_map(batch->buf, GL_FALSE);
+ intel_decode(ptr, used / 4, batch->buf->offset,
+ intel->intelScreen->deviceID);
+ dri_bo_unmap(batch->buf);
}
}
-
-
-void intelEmitFillBlitLocked( intelContextPtr intel,
- GLuint cpp,
- GLshort dst_pitch, /* in bytes */
- GLuint dst_offset,
- GLshort x, GLshort y,
- GLshort w, GLshort h,
- GLuint color )
+void
+intel_batchbuffer_flush(struct intel_batchbuffer *batch)
{
- GLuint BR13, CMD;
- BATCH_LOCALS;
-
- switch(cpp) {
- case 1:
- case 2:
- case 3:
- BR13 = dst_pitch | (0xF0 << 16) | (1<<24);
- CMD = XY_COLOR_BLT_CMD;
- break;
- case 4:
- BR13 = dst_pitch | (0xF0 << 16) | (1<<24) | (1<<25);
- CMD = (XY_COLOR_BLT_CMD | XY_COLOR_BLT_WRITE_ALPHA |
- XY_COLOR_BLT_WRITE_RGB);
- break;
- default:
- return;
- }
-
- BEGIN_BATCH( 6);
- OUT_BATCH( CMD );
- OUT_BATCH( BR13 );
- OUT_BATCH( (y << 16) | x );
- OUT_BATCH( ((y+h) << 16) | (x+w) );
- OUT_BATCH( dst_offset );
- OUT_BATCH( color );
- ADVANCE_BATCH();
-}
+ struct intel_context *intel = batch->intel;
+ GLuint used = batch->ptr - batch->map;
+ GLboolean was_locked = intel->locked;
-
-/* Copy BitBlt
- */
-void intelEmitCopyBlitLocked( intelContextPtr intel,
- GLuint cpp,
- GLshort src_pitch,
- GLuint src_offset,
- GLshort dst_pitch,
- GLuint dst_offset,
- GLshort src_x, GLshort src_y,
- GLshort dst_x, GLshort dst_y,
- GLshort w, GLshort h )
-{
- GLuint CMD, BR13;
- int dst_y2 = dst_y + h;
- int dst_x2 = dst_x + w;
- BATCH_LOCALS;
-
- src_pitch *= cpp;
- dst_pitch *= cpp;
-
- switch(cpp) {
- case 1:
- case 2:
- case 3:
- BR13 = dst_pitch | (0xCC << 16) | (1<<24);
- CMD = XY_SRC_COPY_BLT_CMD;
- break;
- case 4:
- BR13 = dst_pitch | (0xCC << 16) | (1<<24) | (1<<25);
- CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA |
- XY_SRC_COPY_BLT_WRITE_RGB);
- break;
- default:
+ if (used == 0)
return;
- }
-
- if (dst_y2 < dst_y ||
- dst_x2 < dst_x) {
- return;
- }
-
- BEGIN_BATCH( 12);
- OUT_BATCH( CMD );
- OUT_BATCH( BR13 );
- OUT_BATCH( (dst_y << 16) | dst_x );
- OUT_BATCH( (dst_y2 << 16) | dst_x2 );
- OUT_BATCH( dst_offset );
- OUT_BATCH( (src_y << 16) | src_x );
- OUT_BATCH( src_pitch );
- OUT_BATCH( src_offset );
- ADVANCE_BATCH();
-}
-
-
-void intelClearWithBlit(GLcontext *ctx, GLbitfield buffers, GLboolean allFoo,
- GLint cx1Foo, GLint cy1Foo, GLint cwFoo, GLint chFoo)
-{
- intelContextPtr intel = INTEL_CONTEXT( ctx );
- intelScreenPrivate *intelScreen = intel->intelScreen;
- GLuint clear_depth, clear_color;
- GLint cx, cy, cw, ch;
- GLboolean all;
- GLint pitch;
- GLint cpp = intelScreen->cpp;
- GLint i;
- GLuint BR13, CMD, D_CMD;
- BATCH_LOCALS;
-
- intelFlush( &intel->ctx );
- LOCK_HARDWARE( intel );
-
- /* get clear bounds after locking */
- cx = intel->ctx.DrawBuffer->_Xmin;
- cy = intel->ctx.DrawBuffer->_Ymin;
- cw = intel->ctx.DrawBuffer->_Xmax - cx;
- ch = intel->ctx.DrawBuffer->_Ymax - cy;
- all = (cw == intel->ctx.DrawBuffer->Width &&
- ch == intel->ctx.DrawBuffer->Height);
-
- pitch = intelScreen->front.pitch;
-
- clear_color = intel->ClearColor;
- clear_depth = 0;
-
- if (buffers & BUFFER_BIT_DEPTH) {
- clear_depth = (GLuint)(ctx->Depth.Clear * intel->ClearDepth);
- }
-
- if (buffers & BUFFER_BIT_STENCIL) {
- clear_depth |= (ctx->Stencil.Clear & 0xff) << 24;
+ /* Add the MI_BATCH_BUFFER_END. Always add an MI_FLUSH - this is a
+ * performance drain that we would like to avoid.
+ */
+ if (used & 4) {
+ ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd();
+ ((int *) batch->ptr)[1] = 0;
+ ((int *) batch->ptr)[2] = MI_BATCH_BUFFER_END;
+ used += 12;
}
-
- switch(cpp) {
- case 2:
- BR13 = (0xF0 << 16) | (pitch) | (1<<24);
- D_CMD = CMD = XY_COLOR_BLT_CMD;
- break;
- case 4:
- BR13 = (0xF0 << 16) | (pitch) | (1<<24) | (1<<25);
- CMD = (XY_COLOR_BLT_CMD |
- XY_COLOR_BLT_WRITE_ALPHA |
- XY_COLOR_BLT_WRITE_RGB);
- D_CMD = XY_COLOR_BLT_CMD;
- if (buffers & BUFFER_BIT_DEPTH) D_CMD |= XY_COLOR_BLT_WRITE_RGB;
- if (buffers & BUFFER_BIT_STENCIL) D_CMD |= XY_COLOR_BLT_WRITE_ALPHA;
- break;
- default:
- BR13 = (0xF0 << 16) | (pitch) | (1<<24);
- D_CMD = CMD = XY_COLOR_BLT_CMD;
- break;
+ else {
+ ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd();
+ ((int *) batch->ptr)[1] = MI_BATCH_BUFFER_END;
+ used += 8;
}
- {
- /* flip top to bottom */
- cy = intel->driDrawable->h - cy - ch;
- cx = cx + intel->drawX;
- cy += intel->drawY;
-
- /* adjust for page flipping */
- if ( intel->sarea->pf_current_page == 1 ) {
- GLuint tmp = buffers;
-
- buffers &= ~(BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT);
- if ( tmp & BUFFER_BIT_FRONT_LEFT ) buffers |= BUFFER_BIT_BACK_LEFT;
- if ( tmp & BUFFER_BIT_BACK_LEFT ) buffers |= BUFFER_BIT_FRONT_LEFT;
- }
-
- for (i = 0 ; i < intel->numClipRects ; i++)
- {
- drm_clip_rect_t *box = &intel->pClipRects[i];
- drm_clip_rect_t b;
-
- if (!all) {
- GLint x = box->x1;
- GLint y = box->y1;
- GLint w = box->x2 - x;
- GLint h = box->y2 - y;
-
- if (x < cx) w -= cx - x, x = cx;
- if (y < cy) h -= cy - y, y = cy;
- if (x + w > cx + cw) w = cx + cw - x;
- if (y + h > cy + ch) h = cy + ch - y;
- if (w <= 0) continue;
- if (h <= 0) continue;
-
- b.x1 = x;
- b.y1 = y;
- b.x2 = x + w;
- b.y2 = y + h;
- } else {
- b = *box;
- }
+ /* TODO: Just pass the relocation list and dma buffer up to the
+ * kernel.
+ */
+ if (!was_locked)
+ LOCK_HARDWARE(intel);
+ do_flush_locked(batch, used, !(batch->flags & INTEL_BATCH_CLIPRECTS),
+ GL_FALSE);
- if (b.x1 > b.x2 ||
- b.y1 > b.y2 ||
- b.x2 > intelScreen->width ||
- b.y2 > intelScreen->height)
- continue;
-
- if ( buffers & BUFFER_BIT_FRONT_LEFT ) {
- BEGIN_BATCH( 6);
- OUT_BATCH( CMD );
- OUT_BATCH( BR13 );
- OUT_BATCH( (b.y1 << 16) | b.x1 );
- OUT_BATCH( (b.y2 << 16) | b.x2 );
- OUT_BATCH( intelScreen->front.offset );
- OUT_BATCH( clear_color );
- ADVANCE_BATCH();
- }
-
- if ( buffers & BUFFER_BIT_BACK_LEFT ) {
- BEGIN_BATCH( 6);
- OUT_BATCH( CMD );
- OUT_BATCH( BR13 );
- OUT_BATCH( (b.y1 << 16) | b.x1 );
- OUT_BATCH( (b.y2 << 16) | b.x2 );
- OUT_BATCH( intelScreen->back.offset );
- OUT_BATCH( clear_color );
- ADVANCE_BATCH();
- }
+ if (!was_locked)
+ UNLOCK_HARDWARE(intel);
- if ( buffers & (BUFFER_BIT_STENCIL | BUFFER_BIT_DEPTH) ) {
- BEGIN_BATCH( 6);
- OUT_BATCH( D_CMD );
- OUT_BATCH( BR13 );
- OUT_BATCH( (b.y1 << 16) | b.x1 );
- OUT_BATCH( (b.y2 << 16) | b.x2 );
- OUT_BATCH( intelScreen->depth.offset );
- OUT_BATCH( clear_depth );
- ADVANCE_BATCH();
- }
- }
- }
- intelFlushBatchLocked( intel, GL_TRUE, GL_FALSE, GL_TRUE );
- UNLOCK_HARDWARE( intel );
+ /* Reset the buffer:
+ */
+ intel_batchbuffer_reset(batch);
}
-
-
-
-void intelDestroyBatchBuffer( GLcontext *ctx )
+void
+intel_batchbuffer_finish(struct intel_batchbuffer *batch)
{
- intelContextPtr intel = INTEL_CONTEXT(ctx);
-
- if (intel->alloc.offset) {
- intelFreeAGP( intel, intel->alloc.ptr );
- intel->alloc.ptr = NULL;
- intel->alloc.offset = 0;
- }
- else if (intel->alloc.ptr) {
- free(intel->alloc.ptr);
- intel->alloc.ptr = NULL;
- }
-
- memset(&intel->batch, 0, sizeof(intel->batch));
+ intel_batchbuffer_flush(batch);
+ if (batch->last_fence != NULL)
+ dri_fence_wait(batch->last_fence);
}
-void intelInitBatchBuffer( GLcontext *ctx )
+/* This is the only way buffers get added to the validate list.
+ */
+GLboolean
+intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
+ dri_bo *buffer,
+ GLuint flags, GLuint delta)
{
- intelContextPtr intel = INTEL_CONTEXT(ctx);
+ struct buffer_reloc *r = &batch->reloc[batch->nr_relocs++];
- /* This path isn't really safe with rotate:
- */
- if (getenv("INTEL_BATCH") && intel->intelScreen->allow_batchbuffer) {
- switch (intel->intelScreen->deviceID) {
- case PCI_CHIP_I865_G:
- /* HW bug? Seems to crash if batchbuffer crosses 4k boundary.
- */
- intel->alloc.size = 8 * 1024;
- break;
- default:
- /* This is the smallest amount of memory the kernel deals with.
- * We'd ideally like to make this smaller.
- */
- intel->alloc.size = 1 << intel->intelScreen->logTextureGranularity;
- break;
- }
+ assert(batch->nr_relocs <= MAX_RELOCS);
- intel->alloc.ptr = intelAllocateAGP( intel, intel->alloc.size );
- if (intel->alloc.ptr)
- intel->alloc.offset =
- intelAgpOffsetFromVirtual( intel, intel->alloc.ptr );
- else
- intel->alloc.offset = 0; /* OK? */
- }
+ dri_bo_reference(buffer);
+ r->buf = buffer;
+ r->offset = batch->ptr - batch->map;
+ r->delta = delta;
+ r->validate_flags = flags;
+
+ batch->ptr += 4;
+ return GL_TRUE;
+}
- /* The default is now to use a local buffer and pass that to the
- * kernel. This is also a fallback if allocation fails on the
- * above path:
- */
- if (!intel->alloc.ptr) {
- intel->alloc.size = 8 * 1024;
- intel->alloc.ptr = malloc( intel->alloc.size );
- intel->alloc.offset = 0;
- }
- assert(intel->alloc.ptr);
+
+void
+intel_batchbuffer_data(struct intel_batchbuffer *batch,
+ const void *data, GLuint bytes, GLuint flags)
+{
+ assert((bytes & 3) == 0);
+ intel_batchbuffer_require_space(batch, bytes, flags);
+ __memcpy(batch->ptr, data, bytes);
+ batch->ptr += bytes;
}