diff options
author | Jouk <joukj@tarantella.nano.tudelft.nl> | 2007-10-02 15:17:23 +0200 |
---|---|---|
committer | Jouk <joukj@tarantella.nano.tudelft.nl> | 2007-10-02 15:17:23 +0200 |
commit | 584b84256b07e106cd7295495355eb21226465d7 (patch) | |
tree | 028d47d265d01f3f96787792fbd41d495d60856c /src/mesa/drivers/dri/i915/intel_batchbuffer.c | |
parent | eb9a5b6d5127858b01ec12672c999e7d25cd7aed (diff) | |
parent | de1d725f442caa4d8ecbac3256b5a33d1f4a1257 (diff) |
Merge branch 'master' of git+ssh://joukj@git.freedesktop.org/git/mesa/mesa
Diffstat (limited to 'src/mesa/drivers/dri/i915/intel_batchbuffer.c')
-rw-r--r-- | src/mesa/drivers/dri/i915/intel_batchbuffer.c | 967 |
1 files changed, 235 insertions, 732 deletions
diff --git a/src/mesa/drivers/dri/i915/intel_batchbuffer.c b/src/mesa/drivers/dri/i915/intel_batchbuffer.c index 803b41b256..045ff0a5b0 100644 --- a/src/mesa/drivers/dri/i915/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i915/intel_batchbuffer.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -25,805 +25,308 @@ * **************************************************************************/ - -#include <stdio.h> -#include <errno.h> - -#include "mtypes.h" -#include "context.h" -#include "enums.h" -#include "vblank.h" - -#include "intel_reg.h" #include "intel_batchbuffer.h" -#include "intel_context.h" - - - - -/* ================================================================ - * Performance monitoring functions +#include "intel_ioctl.h" +#include "intel_decode.h" +#include "i915_debug.h" + +/* Relocations in kernel space: + * - pass dma buffer seperately + * - memory manager knows how to patch + * - pass list of dependent buffers + * - pass relocation list + * + * Either: + * - get back an offset for buffer to fire + * - memory manager knows how to fire buffer + * + * Really want the buffer to be AGP and pinned. + * */ -static void intel_fill_box( intelContextPtr intel, - GLshort x, GLshort y, - GLshort w, GLshort h, - GLubyte r, GLubyte g, GLubyte b ) -{ - x += intel->drawX; - y += intel->drawY; - - if (x >= 0 && y >= 0 && - x+w < intel->intelScreen->width && - y+h < intel->intelScreen->height) - intelEmitFillBlitLocked( intel, - intel->intelScreen->cpp, - intel->intelScreen->back.pitch, - intel->intelScreen->back.offset, - x, y, w, h, - INTEL_PACKCOLOR(intel->intelScreen->fbFormat, - r,g,b,0xff)); -} +/* Cliprect fence: The highest fence protecting a dma buffer + * containing explicit cliprect information. Like the old drawable + * lock but irq-driven. X server must wait for this fence to expire + * before changing cliprects [and then doing sw rendering?]. For + * other dma buffers, the scheduler will grab current cliprect info + * and mix into buffer. X server must hold the lock while changing + * cliprects??? Make per-drawable. Need cliprects in shared memory + * -- beats storing them with every cmd buffer in the queue. + * + * ==> X server must wait for this fence to expire before touching the + * framebuffer with new cliprects. + * + * ==> Cliprect-dependent buffers associated with a + * cliprect-timestamp. All of the buffers associated with a timestamp + * must go to hardware before any buffer with a newer timestamp. + * + * ==> Dma should be queued per-drawable for correct X/GL + * synchronization. Or can fences be used for this? + * + * Applies to: Blit operations, metaops, X server operations -- X + * server automatically waits on its own dma to complete before + * modifying cliprects ??? + */ -static void intel_draw_performance_boxes( intelContextPtr intel ) +void +intel_batchbuffer_reset(struct intel_batchbuffer *batch) { - /* Purple box for page flipping - */ - if ( intel->perf_boxes & I830_BOX_FLIP ) - intel_fill_box( intel, 4, 4, 8, 8, 255, 0, 255 ); - - /* Red box if we have to wait for idle at any point - */ - if ( intel->perf_boxes & I830_BOX_WAIT ) - intel_fill_box( intel, 16, 4, 8, 8, 255, 0, 0 ); - - /* Blue box: lost context? - */ - if ( intel->perf_boxes & I830_BOX_LOST_CONTEXT ) - intel_fill_box( intel, 28, 4, 8, 8, 0, 0, 255 ); - - /* Yellow box for texture swaps - */ - if ( intel->perf_boxes & I830_BOX_TEXTURE_LOAD ) - intel_fill_box( intel, 40, 4, 8, 8, 255, 255, 0 ); - - /* Green box if hardware never idles (as far as we can tell) - */ - if ( !(intel->perf_boxes & I830_BOX_RING_EMPTY) ) - intel_fill_box( intel, 64, 4, 8, 8, 0, 255, 0 ); + struct intel_context *intel = batch->intel; - - /* Draw bars indicating number of buffers allocated - * (not a great measure, easily confused) - */ -#if 0 - if (intel->dma_used) { - int bar = intel->dma_used / 10240; - if (bar > 100) bar = 100; - if (bar < 1) bar = 1; - intel_fill_box( intel, 4, 16, bar, 4, 196, 128, 128 ); - intel->dma_used = 0; + if (batch->buf != NULL) { + dri_bo_unreference(batch->buf); + batch->buf = NULL; } -#endif - intel->perf_boxes = 0; + batch->buf = dri_bo_alloc(intel->intelScreen->bufmgr, "batchbuffer", + intel->intelScreen->maxBatchSize, 4096, + DRM_BO_FLAG_MEM_TT); + dri_bo_map(batch->buf, GL_TRUE); + batch->map = batch->buf->virtual; + batch->size = intel->intelScreen->maxBatchSize; + batch->ptr = batch->map; } - - - - - -static int bad_prim_vertex_nr( int primitive, int nr ) +struct intel_batchbuffer * +intel_batchbuffer_alloc(struct intel_context *intel) { - switch (primitive & PRIM3D_MASK) { - case PRIM3D_POINTLIST: - return nr < 1; - case PRIM3D_LINELIST: - return (nr & 1) || nr == 0; - case PRIM3D_LINESTRIP: - return nr < 2; - case PRIM3D_TRILIST: - case PRIM3D_RECTLIST: - return nr % 3 || nr == 0; - case PRIM3D_POLY: - case PRIM3D_TRIFAN: - case PRIM3D_TRISTRIP: - case PRIM3D_TRISTRIP_RVRSE: - return nr < 3; - default: - return 1; - } -} + struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1); -static void intel_flush_inline_primitive( GLcontext *ctx ) -{ - intelContextPtr intel = INTEL_CONTEXT( ctx ); - GLuint used = intel->batch.ptr - intel->prim.start_ptr; - GLuint vertcount; - - assert(intel->prim.primitive != ~0); - - if (1) { - /* Check vertex size against the vertex we're specifying to - * hardware. If it's wrong, ditch the primitive. - */ - if (!intel->vtbl.check_vertex_size( intel, intel->vertex_size )) - goto do_discard; - - vertcount = (used - 4)/ (intel->vertex_size * 4); - - if (!vertcount) - goto do_discard; - - if (vertcount * intel->vertex_size * 4 != used - 4) { - fprintf(stderr, "vertex size confusion %d %d\n", used, - intel->vertex_size * vertcount * 4); - goto do_discard; - } + batch->intel = intel; + batch->last_fence = NULL; + intel_batchbuffer_reset(batch); - if (bad_prim_vertex_nr( intel->prim.primitive, vertcount )) { - fprintf(stderr, "bad_prim_vertex_nr %x %d\n", intel->prim.primitive, - vertcount); - goto do_discard; - } - } - - if (used < 8) - goto do_discard; - - *(int *)intel->prim.start_ptr = (_3DPRIMITIVE | - intel->prim.primitive | - (used/4-2)); - - goto finished; - - do_discard: - intel->batch.ptr -= used; - intel->batch.space += used; - assert(intel->batch.space >= 0); - - finished: - intel->prim.primitive = ~0; - intel->prim.start_ptr = 0; - intel->prim.flush = 0; + return batch; } - -/* Emit a primitive referencing vertices in a vertex buffer. - */ -void intelStartInlinePrimitive( intelContextPtr intel, GLuint prim ) +void +intel_batchbuffer_free(struct intel_batchbuffer *batch) { - BATCH_LOCALS; - - if (0) - fprintf(stderr, "%s %x\n", __FUNCTION__, prim); - - - /* Finish any in-progress primitive: - */ - INTEL_FIREVERTICES( intel ); - - /* Emit outstanding state: - */ - intel->vtbl.emit_state( intel ); - - /* Make sure there is some space in this buffer: - */ - if (intel->vertex_size * 10 * sizeof(GLuint) >= intel->batch.space) { - intelFlushBatch(intel, GL_TRUE); - intel->vtbl.emit_state( intel ); + if (batch->last_fence) { + dri_fence_wait(batch->last_fence); + dri_fence_unreference(batch->last_fence); + batch->last_fence = NULL; } - -#if 1 - if (((unsigned long)intel->batch.ptr) & 0x4) { - BEGIN_BATCH(1); - OUT_BATCH(0); - ADVANCE_BATCH(); + if (batch->map) { + dri_bo_unmap(batch->buf); + batch->map = NULL; } -#endif - - /* Emit a slot which will be filled with the inline primitive - * command later. - */ - BEGIN_BATCH(2); - OUT_BATCH( 0 ); - - intel->prim.start_ptr = batch_ptr; - intel->prim.primitive = prim; - intel->prim.flush = intel_flush_inline_primitive; - intel->batch.contains_geometry = 1; - - OUT_BATCH( 0 ); - ADVANCE_BATCH(); + dri_bo_unreference(batch->buf); + batch->buf = NULL; + free(batch); } +static int +relocation_sort(const void *a_in, const void *b_in) { + const struct buffer_reloc *a = a_in, *b = b_in; -void intelRestartInlinePrimitive( intelContextPtr intel ) -{ - GLuint prim = intel->prim.primitive; - - intel_flush_inline_primitive( &intel->ctx ); - if (1) intelFlushBatch(intel, GL_TRUE); /* GL_TRUE - is critical */ - intelStartInlinePrimitive( intel, prim ); + return (intptr_t)a->buf < (intptr_t)b->buf ? -1 : 1; } - -void intelWrapInlinePrimitive( intelContextPtr intel ) -{ - GLuint prim = intel->prim.primitive; - - if (0) - fprintf(stderr, "%s\n", __FUNCTION__); - intel_flush_inline_primitive( &intel->ctx ); - intelFlushBatch(intel, GL_TRUE); - intelStartInlinePrimitive( intel, prim ); -} - - -/* Emit a primitive with space for inline vertices. +/* TODO: Push this whole function into bufmgr. */ -GLuint *intelEmitInlinePrimitiveLocked(intelContextPtr intel, - int primitive, - int dwords, - int vertex_size ) +static void +do_flush_locked(struct intel_batchbuffer *batch, + GLuint used, + GLboolean ignore_cliprects, GLboolean allow_unlock) { - GLuint *tmp = 0; - BATCH_LOCALS; - - if (0) - fprintf(stderr, "%s 0x%x %d\n", __FUNCTION__, primitive, dwords); - - /* Emit outstanding state: + GLuint *ptr; + GLuint i; + struct intel_context *intel = batch->intel; + dri_fence *fo; + GLboolean performed_rendering = GL_FALSE; + + assert(batch->buf->virtual != NULL); + ptr = batch->buf->virtual; + + /* Sort our relocation list in terms of referenced buffer pointer. + * This lets us uniquely validate the buffers with the sum of all the flags, + * while avoiding O(n^2) on number of relocations. */ - intel->vtbl.emit_state( intel ); - - if ((1+dwords)*4 >= intel->batch.space) { - intelFlushBatch(intel, GL_TRUE); - intel->vtbl.emit_state( intel ); - } - - - if (1) { - int used = dwords * 4; - int vertcount; + qsort(batch->reloc, batch->nr_relocs, sizeof(batch->reloc[0]), + relocation_sort); - /* Check vertex size against the vertex we're specifying to - * hardware. If it's wrong, ditch the primitive. - */ - if (!intel->vtbl.check_vertex_size( intel, vertex_size )) - goto do_discard; - - vertcount = dwords / vertex_size; - - if (dwords % vertex_size) { - fprintf(stderr, "did not request a whole number of vertices\n"); - goto do_discard; - } + /* Perform the necessary validations of buffers, and enter the relocations + * in the batchbuffer. + */ + for (i = 0; i < batch->nr_relocs; i++) { + struct buffer_reloc *r = &batch->reloc[i]; + + if (r->validate_flags & DRM_BO_FLAG_WRITE) + performed_rendering = GL_TRUE; + + /* If this is the first time we've seen this buffer in the relocation + * list, figure out our flags and validate it. + */ + if (i == 0 || batch->reloc[i - 1].buf != r->buf) { + uint32_t validate_flags; + int j, ret; + + /* Accumulate the flags we need for validating this buffer. */ + validate_flags = r->validate_flags; + for (j = i + 1; j < batch->nr_relocs; j++) { + if (batch->reloc[j].buf != r->buf) + break; + validate_flags |= batch->reloc[j].validate_flags; + } - if (bad_prim_vertex_nr( primitive, vertcount )) { - fprintf(stderr, "bad_prim_vertex_nr %x %d\n", primitive, vertcount); - goto do_discard; + /* Validate. If we fail, fence to clear the unfenced list and bail + * out. + */ + ret = dri_bo_validate(r->buf, validate_flags); + if (ret != 0) { + dri_bo_unmap(batch->buf); + fo = dri_fence_validated(intel->intelScreen->bufmgr, + "batchbuffer failure fence", GL_TRUE); + dri_fence_unreference(fo); + goto done; + } } - - if (used < 8) - goto do_discard; + ptr[r->offset / 4] = r->buf->offset + r->delta; + dri_bo_unreference(r->buf); } - /* Emit 3D_PRIMITIVE commands: - */ - BEGIN_BATCH(1 + dwords); - OUT_BATCH( _3DPRIMITIVE | - primitive | - (dwords-1) ); - - tmp = (GLuint *)batch_ptr; - batch_ptr += dwords * 4; - - ADVANCE_BATCH(); - - intel->batch.contains_geometry = 1; - - do_discard: - return tmp; -} + dri_bo_unmap(batch->buf); + batch->map = NULL; + batch->ptr = NULL; + dri_bo_validate(batch->buf, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE); -static void intelWaitForFrameCompletion( intelContextPtr intel ) -{ - drm_i915_sarea_t *sarea = (drm_i915_sarea_t *)intel->sarea; + batch->list_count = 0; + batch->nr_relocs = 0; + batch->flags = 0; - if (intel->do_irqs) { - if (intelGetLastFrame(intel) < sarea->last_dispatch) { - if (!intel->irqsEmitted) { - while (intelGetLastFrame (intel) < sarea->last_dispatch) - ; - } - else { - intelWaitIrq( intel, intel->alloc.irq_emitted ); - } - intel->irqsEmitted = 10; - } + /* Throw away non-effective packets. Won't work once we have + * hardware contexts which would preserve statechanges beyond a + * single buffer. + */ - if (intel->irqsEmitted) { - LOCK_HARDWARE( intel ); - intelEmitIrqLocked( intel ); - intel->irqsEmitted--; - UNLOCK_HARDWARE( intel ); - } - } - else { - while (intelGetLastFrame (intel) < sarea->last_dispatch) { - if (intel->do_usleeps) - DO_USLEEP( 1 ); - } + if (!(intel->numClipRects == 0 && !ignore_cliprects)) { + intel_batch_ioctl(batch->intel, + batch->buf->offset, + used, ignore_cliprects, allow_unlock); } -} -/* - * Copy the back buffer to the front buffer. - */ -void intelCopyBuffer( const __DRIdrawablePrivate *dPriv, - const drm_clip_rect_t *rect) -{ - intelContextPtr intel; - const intelScreenPrivate *intelScreen; - GLboolean missed_target; - int64_t ust; - - if (0) - fprintf(stderr, "%s\n", __FUNCTION__); - - assert(dPriv); - assert(dPriv->driContextPriv); - assert(dPriv->driContextPriv->driverPrivate); - - intel = (intelContextPtr) dPriv->driContextPriv->driverPrivate; - - intelFlush( &intel->ctx ); - - intelScreen = intel->intelScreen; - - if (!rect && !intel->swap_scheduled && intelScreen->drmMinor >= 6 && - !(intel->vblank_flags & VBLANK_FLAG_NO_IRQ) && - intelScreen->current_rotation == 0) { - unsigned int interval = driGetVBlankInterval(dPriv, intel->vblank_flags); - unsigned int target; - drm_i915_vblank_swap_t swap; - - swap.drawable = dPriv->hHWDrawable; - swap.seqtype = DRM_VBLANK_ABSOLUTE; - target = swap.sequence = intel->vbl_seq + interval; - - if (intel->vblank_flags & VBLANK_FLAG_SYNC) { - swap.seqtype |= DRM_VBLANK_NEXTONMISS; - } else if (interval == 0) { - goto noschedule; - } - - if ( intel->vblank_flags & VBLANK_FLAG_SECONDARY ) { - swap.seqtype |= DRM_VBLANK_SECONDARY; - } + /* Associate a fence with the validated buffers, and note that we included + * a flush at the end. + */ + fo = dri_fence_validated(intel->intelScreen->bufmgr, + "Batch fence", GL_TRUE); - if (!drmCommandWriteRead(intel->driFd, DRM_I915_VBLANK_SWAP, &swap, - sizeof(swap))) { - intel->swap_scheduled = 1; - intel->vbl_seq = swap.sequence; - swap.sequence -= target; - missed_target = swap.sequence > 0 && swap.sequence <= (1 << 23); - } + if (performed_rendering) { + dri_fence_unreference(batch->last_fence); + batch->last_fence = fo; } else { - intel->swap_scheduled = 0; + /* If we didn't validate any buffers for writing by the card, we don't + * need to track the fence for glFinish(). + */ + dri_fence_unreference(fo); } -noschedule: - - if (!intel->swap_scheduled) { - intelWaitForFrameCompletion( intel ); - LOCK_HARDWARE( intel ); - if (!rect) - { - UNLOCK_HARDWARE( intel ); - driWaitForVBlank( dPriv, &intel->vbl_seq, intel->vblank_flags, & missed_target ); - LOCK_HARDWARE( intel ); - } - { - const intelScreenPrivate *intelScreen = intel->intelScreen; - const __DRIdrawablePrivate *dPriv = intel->driDrawable; - const int nbox = dPriv->numClipRects; - const drm_clip_rect_t *pbox = dPriv->pClipRects; - drm_clip_rect_t box; - const int cpp = intelScreen->cpp; - const int pitch = intelScreen->front.pitch; /* in bytes */ - int i; - GLuint CMD, BR13; - BATCH_LOCALS; - - switch(cpp) { - case 2: - BR13 = (pitch) | (0xCC << 16) | (1<<24); - CMD = XY_SRC_COPY_BLT_CMD; - break; - case 4: - BR13 = (pitch) | (0xCC << 16) | (1<<24) | (1<<25); - CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA | - XY_SRC_COPY_BLT_WRITE_RGB); - break; - default: - BR13 = (pitch) | (0xCC << 16) | (1<<24); - CMD = XY_SRC_COPY_BLT_CMD; - break; - } - - if (0) - intel_draw_performance_boxes( intel ); - - for (i = 0 ; i < nbox; i++, pbox++) - { - if (pbox->x1 > pbox->x2 || - pbox->y1 > pbox->y2 || - pbox->x2 > intelScreen->width || - pbox->y2 > intelScreen->height) { - _mesa_warning(&intel->ctx, "Bad cliprect in intelCopyBuffer()"); - continue; - } - - box = *pbox; - - if (rect) - { - if (rect->x1 > box.x1) - box.x1 = rect->x1; - if (rect->y1 > box.y1) - box.y1 = rect->y1; - if (rect->x2 < box.x2) - box.x2 = rect->x2; - if (rect->y2 < box.y2) - box.y2 = rect->y2; - - if (box.x1 > box.x2 || box.y1 > box.y2) - continue; - } - - BEGIN_BATCH( 8); - OUT_BATCH( CMD ); - OUT_BATCH( BR13 ); - OUT_BATCH( (box.y1 << 16) | box.x1 ); - OUT_BATCH( (box.y2 << 16) | box.x2 ); - - if (intel->sarea->pf_current_page == 0) - OUT_BATCH( intelScreen->front.offset ); - else - OUT_BATCH( intelScreen->back.offset ); - - OUT_BATCH( (box.y1 << 16) | box.x1 ); - OUT_BATCH( BR13 & 0xffff ); - - if (intel->sarea->pf_current_page == 0) - OUT_BATCH( intelScreen->back.offset ); - else - OUT_BATCH( intelScreen->front.offset ); - - ADVANCE_BATCH(); - } + if (intel->numClipRects == 0 && !ignore_cliprects) { + if (allow_unlock) { + /* If we are not doing any actual user-visible rendering, + * do a sched_yield to keep the app from pegging the cpu while + * achieving nothing. + */ + UNLOCK_HARDWARE(intel); + sched_yield(); + LOCK_HARDWARE(intel); } - intelFlushBatchLocked( intel, GL_TRUE, GL_TRUE, GL_TRUE ); - UNLOCK_HARDWARE( intel ); + intel->vtbl.lost_hardware(intel); } - if (!rect) - { - intel->swap_count++; - (*dri_interface->getUST)(&ust); - if (missed_target) { - intel->swap_missed_count++; - intel->swap_missed_ust = ust - intel->swap_ust; - } - - intel->swap_ust = ust; +done: + if (INTEL_DEBUG & DEBUG_BATCH) { + dri_bo_map(batch->buf, GL_FALSE); + intel_decode(ptr, used / 4, batch->buf->offset, + intel->intelScreen->deviceID); + dri_bo_unmap(batch->buf); } } - - -void intelEmitFillBlitLocked( intelContextPtr intel, - GLuint cpp, - GLshort dst_pitch, /* in bytes */ - GLuint dst_offset, - GLshort x, GLshort y, - GLshort w, GLshort h, - GLuint color ) +void +intel_batchbuffer_flush(struct intel_batchbuffer *batch) { - GLuint BR13, CMD; - BATCH_LOCALS; - - switch(cpp) { - case 1: - case 2: - case 3: - BR13 = dst_pitch | (0xF0 << 16) | (1<<24); - CMD = XY_COLOR_BLT_CMD; - break; - case 4: - BR13 = dst_pitch | (0xF0 << 16) | (1<<24) | (1<<25); - CMD = (XY_COLOR_BLT_CMD | XY_COLOR_BLT_WRITE_ALPHA | - XY_COLOR_BLT_WRITE_RGB); - break; - default: - return; - } - - BEGIN_BATCH( 6); - OUT_BATCH( CMD ); - OUT_BATCH( BR13 ); - OUT_BATCH( (y << 16) | x ); - OUT_BATCH( ((y+h) << 16) | (x+w) ); - OUT_BATCH( dst_offset ); - OUT_BATCH( color ); - ADVANCE_BATCH(); -} + struct intel_context *intel = batch->intel; + GLuint used = batch->ptr - batch->map; + GLboolean was_locked = intel->locked; - -/* Copy BitBlt - */ -void intelEmitCopyBlitLocked( intelContextPtr intel, - GLuint cpp, - GLshort src_pitch, - GLuint src_offset, - GLshort dst_pitch, - GLuint dst_offset, - GLshort src_x, GLshort src_y, - GLshort dst_x, GLshort dst_y, - GLshort w, GLshort h ) -{ - GLuint CMD, BR13; - int dst_y2 = dst_y + h; - int dst_x2 = dst_x + w; - BATCH_LOCALS; - - src_pitch *= cpp; - dst_pitch *= cpp; - - switch(cpp) { - case 1: - case 2: - case 3: - BR13 = dst_pitch | (0xCC << 16) | (1<<24); - CMD = XY_SRC_COPY_BLT_CMD; - break; - case 4: - BR13 = dst_pitch | (0xCC << 16) | (1<<24) | (1<<25); - CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA | - XY_SRC_COPY_BLT_WRITE_RGB); - break; - default: + if (used == 0) return; - } - - if (dst_y2 < dst_y || - dst_x2 < dst_x) { - return; - } - - BEGIN_BATCH( 12); - OUT_BATCH( CMD ); - OUT_BATCH( BR13 ); - OUT_BATCH( (dst_y << 16) | dst_x ); - OUT_BATCH( (dst_y2 << 16) | dst_x2 ); - OUT_BATCH( dst_offset ); - OUT_BATCH( (src_y << 16) | src_x ); - OUT_BATCH( src_pitch ); - OUT_BATCH( src_offset ); - ADVANCE_BATCH(); -} - - -void intelClearWithBlit(GLcontext *ctx, GLbitfield buffers, GLboolean allFoo, - GLint cx1Foo, GLint cy1Foo, GLint cwFoo, GLint chFoo) -{ - intelContextPtr intel = INTEL_CONTEXT( ctx ); - intelScreenPrivate *intelScreen = intel->intelScreen; - GLuint clear_depth, clear_color; - GLint cx, cy, cw, ch; - GLboolean all; - GLint pitch; - GLint cpp = intelScreen->cpp; - GLint i; - GLuint BR13, CMD, D_CMD; - BATCH_LOCALS; - - intelFlush( &intel->ctx ); - LOCK_HARDWARE( intel ); - - /* get clear bounds after locking */ - cx = intel->ctx.DrawBuffer->_Xmin; - cy = intel->ctx.DrawBuffer->_Ymin; - cw = intel->ctx.DrawBuffer->_Xmax - cx; - ch = intel->ctx.DrawBuffer->_Ymax - cy; - all = (cw == intel->ctx.DrawBuffer->Width && - ch == intel->ctx.DrawBuffer->Height); - - pitch = intelScreen->front.pitch; - - clear_color = intel->ClearColor; - clear_depth = 0; - - if (buffers & BUFFER_BIT_DEPTH) { - clear_depth = (GLuint)(ctx->Depth.Clear * intel->ClearDepth); - } - - if (buffers & BUFFER_BIT_STENCIL) { - clear_depth |= (ctx->Stencil.Clear & 0xff) << 24; + /* Add the MI_BATCH_BUFFER_END. Always add an MI_FLUSH - this is a + * performance drain that we would like to avoid. + */ + if (used & 4) { + ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd(); + ((int *) batch->ptr)[1] = 0; + ((int *) batch->ptr)[2] = MI_BATCH_BUFFER_END; + used += 12; } - - switch(cpp) { - case 2: - BR13 = (0xF0 << 16) | (pitch) | (1<<24); - D_CMD = CMD = XY_COLOR_BLT_CMD; - break; - case 4: - BR13 = (0xF0 << 16) | (pitch) | (1<<24) | (1<<25); - CMD = (XY_COLOR_BLT_CMD | - XY_COLOR_BLT_WRITE_ALPHA | - XY_COLOR_BLT_WRITE_RGB); - D_CMD = XY_COLOR_BLT_CMD; - if (buffers & BUFFER_BIT_DEPTH) D_CMD |= XY_COLOR_BLT_WRITE_RGB; - if (buffers & BUFFER_BIT_STENCIL) D_CMD |= XY_COLOR_BLT_WRITE_ALPHA; - break; - default: - BR13 = (0xF0 << 16) | (pitch) | (1<<24); - D_CMD = CMD = XY_COLOR_BLT_CMD; - break; + else { + ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd(); + ((int *) batch->ptr)[1] = MI_BATCH_BUFFER_END; + used += 8; } - { - /* flip top to bottom */ - cy = intel->driDrawable->h - cy - ch; - cx = cx + intel->drawX; - cy += intel->drawY; - - /* adjust for page flipping */ - if ( intel->sarea->pf_current_page == 1 ) { - GLuint tmp = buffers; - - buffers &= ~(BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT); - if ( tmp & BUFFER_BIT_FRONT_LEFT ) buffers |= BUFFER_BIT_BACK_LEFT; - if ( tmp & BUFFER_BIT_BACK_LEFT ) buffers |= BUFFER_BIT_FRONT_LEFT; - } - - for (i = 0 ; i < intel->numClipRects ; i++) - { - drm_clip_rect_t *box = &intel->pClipRects[i]; - drm_clip_rect_t b; - - if (!all) { - GLint x = box->x1; - GLint y = box->y1; - GLint w = box->x2 - x; - GLint h = box->y2 - y; - - if (x < cx) w -= cx - x, x = cx; - if (y < cy) h -= cy - y, y = cy; - if (x + w > cx + cw) w = cx + cw - x; - if (y + h > cy + ch) h = cy + ch - y; - if (w <= 0) continue; - if (h <= 0) continue; - - b.x1 = x; - b.y1 = y; - b.x2 = x + w; - b.y2 = y + h; - } else { - b = *box; - } + /* TODO: Just pass the relocation list and dma buffer up to the + * kernel. + */ + if (!was_locked) + LOCK_HARDWARE(intel); + do_flush_locked(batch, used, !(batch->flags & INTEL_BATCH_CLIPRECTS), + GL_FALSE); - if (b.x1 > b.x2 || - b.y1 > b.y2 || - b.x2 > intelScreen->width || - b.y2 > intelScreen->height) - continue; - - if ( buffers & BUFFER_BIT_FRONT_LEFT ) { - BEGIN_BATCH( 6); - OUT_BATCH( CMD ); - OUT_BATCH( BR13 ); - OUT_BATCH( (b.y1 << 16) | b.x1 ); - OUT_BATCH( (b.y2 << 16) | b.x2 ); - OUT_BATCH( intelScreen->front.offset ); - OUT_BATCH( clear_color ); - ADVANCE_BATCH(); - } - - if ( buffers & BUFFER_BIT_BACK_LEFT ) { - BEGIN_BATCH( 6); - OUT_BATCH( CMD ); - OUT_BATCH( BR13 ); - OUT_BATCH( (b.y1 << 16) | b.x1 ); - OUT_BATCH( (b.y2 << 16) | b.x2 ); - OUT_BATCH( intelScreen->back.offset ); - OUT_BATCH( clear_color ); - ADVANCE_BATCH(); - } + if (!was_locked) + UNLOCK_HARDWARE(intel); - if ( buffers & (BUFFER_BIT_STENCIL | BUFFER_BIT_DEPTH) ) { - BEGIN_BATCH( 6); - OUT_BATCH( D_CMD ); - OUT_BATCH( BR13 ); - OUT_BATCH( (b.y1 << 16) | b.x1 ); - OUT_BATCH( (b.y2 << 16) | b.x2 ); - OUT_BATCH( intelScreen->depth.offset ); - OUT_BATCH( clear_depth ); - ADVANCE_BATCH(); - } - } - } - intelFlushBatchLocked( intel, GL_TRUE, GL_FALSE, GL_TRUE ); - UNLOCK_HARDWARE( intel ); + /* Reset the buffer: + */ + intel_batchbuffer_reset(batch); } - - - -void intelDestroyBatchBuffer( GLcontext *ctx ) +void +intel_batchbuffer_finish(struct intel_batchbuffer *batch) { - intelContextPtr intel = INTEL_CONTEXT(ctx); - - if (intel->alloc.offset) { - intelFreeAGP( intel, intel->alloc.ptr ); - intel->alloc.ptr = NULL; - intel->alloc.offset = 0; - } - else if (intel->alloc.ptr) { - free(intel->alloc.ptr); - intel->alloc.ptr = NULL; - } - - memset(&intel->batch, 0, sizeof(intel->batch)); + intel_batchbuffer_flush(batch); + if (batch->last_fence != NULL) + dri_fence_wait(batch->last_fence); } -void intelInitBatchBuffer( GLcontext *ctx ) +/* This is the only way buffers get added to the validate list. + */ +GLboolean +intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, + dri_bo *buffer, + GLuint flags, GLuint delta) { - intelContextPtr intel = INTEL_CONTEXT(ctx); + struct buffer_reloc *r = &batch->reloc[batch->nr_relocs++]; - /* This path isn't really safe with rotate: - */ - if (getenv("INTEL_BATCH") && intel->intelScreen->allow_batchbuffer) { - switch (intel->intelScreen->deviceID) { - case PCI_CHIP_I865_G: - /* HW bug? Seems to crash if batchbuffer crosses 4k boundary. - */ - intel->alloc.size = 8 * 1024; - break; - default: - /* This is the smallest amount of memory the kernel deals with. - * We'd ideally like to make this smaller. - */ - intel->alloc.size = 1 << intel->intelScreen->logTextureGranularity; - break; - } + assert(batch->nr_relocs <= MAX_RELOCS); - intel->alloc.ptr = intelAllocateAGP( intel, intel->alloc.size ); - if (intel->alloc.ptr) - intel->alloc.offset = - intelAgpOffsetFromVirtual( intel, intel->alloc.ptr ); - else - intel->alloc.offset = 0; /* OK? */ - } + dri_bo_reference(buffer); + r->buf = buffer; + r->offset = batch->ptr - batch->map; + r->delta = delta; + r->validate_flags = flags; + + batch->ptr += 4; + return GL_TRUE; +} - /* The default is now to use a local buffer and pass that to the - * kernel. This is also a fallback if allocation fails on the - * above path: - */ - if (!intel->alloc.ptr) { - intel->alloc.size = 8 * 1024; - intel->alloc.ptr = malloc( intel->alloc.size ); - intel->alloc.offset = 0; - } - assert(intel->alloc.ptr); + +void +intel_batchbuffer_data(struct intel_batchbuffer *batch, + const void *data, GLuint bytes, GLuint flags) +{ + assert((bytes & 3) == 0); + intel_batchbuffer_require_space(batch, bytes, flags); + __memcpy(batch->ptr, data, bytes); + batch->ptr += bytes; } |