From b2f1aa2389473ed09170713301b042661d70a48e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 27 Sep 2007 10:16:04 -0700 Subject: WIP 965 conversion to dri_bufmgr. --- src/mesa/drivers/dri/intel/intel_batchbuffer.c | 331 +++++++++++++++++++++++++ src/mesa/drivers/dri/intel/intel_batchbuffer.h | 122 +++++++++ 2 files changed, 453 insertions(+) create mode 100644 src/mesa/drivers/dri/intel/intel_batchbuffer.c create mode 100644 src/mesa/drivers/dri/intel/intel_batchbuffer.h (limited to 'src/mesa/drivers/dri/intel') diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c new file mode 100644 index 0000000000..8ee48b5a68 --- /dev/null +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -0,0 +1,331 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "intel_batchbuffer.h" +#include "intel_ioctl.h" +#include "intel_decode.h" +#include "i915_debug.h" + +/* Relocations in kernel space: + * - pass dma buffer seperately + * - memory manager knows how to patch + * - pass list of dependent buffers + * - pass relocation list + * + * Either: + * - get back an offset for buffer to fire + * - memory manager knows how to fire buffer + * + * Really want the buffer to be AGP and pinned. + * + */ + +/* Cliprect fence: The highest fence protecting a dma buffer + * containing explicit cliprect information. Like the old drawable + * lock but irq-driven. X server must wait for this fence to expire + * before changing cliprects [and then doing sw rendering?]. For + * other dma buffers, the scheduler will grab current cliprect info + * and mix into buffer. X server must hold the lock while changing + * cliprects??? Make per-drawable. Need cliprects in shared memory + * -- beats storing them with every cmd buffer in the queue. + * + * ==> X server must wait for this fence to expire before touching the + * framebuffer with new cliprects. + * + * ==> Cliprect-dependent buffers associated with a + * cliprect-timestamp. All of the buffers associated with a timestamp + * must go to hardware before any buffer with a newer timestamp. + * + * ==> Dma should be queued per-drawable for correct X/GL + * synchronization. Or can fences be used for this? + * + * Applies to: Blit operations, metaops, X server operations -- X + * server automatically waits on its own dma to complete before + * modifying cliprects ??? + */ + +void +intel_batchbuffer_reset(struct intel_batchbuffer *batch) +{ + struct intel_context *intel = batch->intel; + + if (batch->buf != NULL) { + dri_bo_unreference(batch->buf); + batch->buf = NULL; + } + + batch->buf = dri_bo_alloc(intel->intelScreen->bufmgr, "batchbuffer", + intel->intelScreen->maxBatchSize, 4096, + DRM_BO_FLAG_MEM_TT); + dri_bo_map(batch->buf, GL_TRUE); + batch->map = batch->buf->virtual; + batch->size = intel->intelScreen->maxBatchSize; + batch->ptr = batch->map; +} + +struct intel_batchbuffer * +intel_batchbuffer_alloc(struct intel_context *intel) +{ + struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1); + + batch->intel = intel; + batch->last_fence = NULL; + intel_batchbuffer_reset(batch); + + return batch; +} + +void +intel_batchbuffer_free(struct intel_batchbuffer *batch) +{ + if (batch->last_fence) { + dri_fence_wait(batch->last_fence); + dri_fence_unreference(batch->last_fence); + batch->last_fence = NULL; + } + if (batch->map) { + dri_bo_unmap(batch->buf); + batch->map = NULL; + } + dri_bo_unreference(batch->buf); + batch->buf = NULL; + free(batch); +} + +static int +relocation_sort(const void *a_in, const void *b_in) { + const struct buffer_reloc *a = a_in, *b = b_in; + + return (intptr_t)a->buf < (intptr_t)b->buf ? -1 : 1; +} + + +/* TODO: Push this whole function into bufmgr. + */ +static void +do_flush_locked(struct intel_batchbuffer *batch, + GLuint used, + GLboolean ignore_cliprects, GLboolean allow_unlock) +{ + GLuint *ptr; + GLuint i; + struct intel_context *intel = batch->intel; + dri_fence *fo; + GLboolean performed_rendering = GL_FALSE; + + assert(batch->buf->virtual != NULL); + ptr = batch->buf->virtual; + + /* Sort our relocation list in terms of referenced buffer pointer. + * This lets us uniquely validate the buffers with the sum of all the flags, + * while avoiding O(n^2) on number of relocations. + */ + qsort(batch->reloc, batch->nr_relocs, sizeof(batch->reloc[0]), + relocation_sort); + + /* Perform the necessary validations of buffers, and enter the relocations + * in the batchbuffer. + */ + for (i = 0; i < batch->nr_relocs; i++) { + struct buffer_reloc *r = &batch->reloc[i]; + + if (r->validate_flags & DRM_BO_FLAG_WRITE) + performed_rendering = GL_TRUE; + + /* If this is the first time we've seen this buffer in the relocation + * list, figure out our flags and validate it. + */ + if (i == 0 || batch->reloc[i - 1].buf != r->buf) { + uint32_t validate_flags; + int j, ret; + + /* Accumulate the flags we need for validating this buffer. */ + validate_flags = r->validate_flags; + for (j = i + 1; j < batch->nr_relocs; j++) { + if (batch->reloc[j].buf != r->buf) + break; + validate_flags |= batch->reloc[j].validate_flags; + } + + /* Validate. If we fail, fence to clear the unfenced list and bail + * out. + */ + ret = dri_bo_validate(r->buf, validate_flags); + if (ret != 0) { + dri_bo_unmap(batch->buf); + fo = dri_fence_validated(intel->intelScreen->bufmgr, + "batchbuffer failure fence", GL_TRUE); + dri_fence_unreference(fo); + goto done; + } + } + ptr[r->offset / 4] = r->buf->offset + r->delta; + dri_bo_unreference(r->buf); + } + + dri_bo_unmap(batch->buf); + batch->map = NULL; + batch->ptr = NULL; + + dri_bo_validate(batch->buf, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE); + + batch->list_count = 0; + batch->nr_relocs = 0; + batch->flags = 0; + + /* Throw away non-effective packets. Won't work once we have + * hardware contexts which would preserve statechanges beyond a + * single buffer. + */ + + if (!(intel->numClipRects == 0 && !ignore_cliprects)) { + intel_batch_ioctl(batch->intel, + batch->buf->offset, + used, ignore_cliprects, allow_unlock); + } + + /* Associate a fence with the validated buffers, and note that we included + * a flush at the end. + */ + fo = dri_fence_validated(intel->intelScreen->bufmgr, + "Batch fence", GL_TRUE); + + if (performed_rendering) { + dri_fence_unreference(batch->last_fence); + batch->last_fence = fo; + } else { + /* If we didn't validate any buffers for writing by the card, we don't + * need to track the fence for glFinish(). + */ + dri_fence_unreference(fo); + } + + if (intel->numClipRects == 0 && !ignore_cliprects) { + if (allow_unlock) { + /* If we are not doing any actual user-visible rendering, + * do a sched_yield to keep the app from pegging the cpu while + * achieving nothing. + */ + UNLOCK_HARDWARE(intel); + sched_yield(); + LOCK_HARDWARE(intel); + } + intel->vtbl.lost_hardware(intel); + } + +done: + if (INTEL_DEBUG & DEBUG_BATCH) { + dri_bo_map(batch->buf, GL_FALSE); + intel_decode(ptr, used / 4, batch->buf->offset); + dri_bo_unmap(batch->buf); + } +} + + +void +intel_batchbuffer_flush(struct intel_batchbuffer *batch) +{ + struct intel_context *intel = batch->intel; + GLuint used = batch->ptr - batch->map; + GLboolean was_locked = intel->locked; + + if (used == 0) + return; + + /* Add the MI_BATCH_BUFFER_END. Always add an MI_FLUSH - this is a + * performance drain that we would like to avoid. + */ + if (used & 4) { + ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd(); + ((int *) batch->ptr)[1] = 0; + ((int *) batch->ptr)[2] = MI_BATCH_BUFFER_END; + used += 12; + } + else { + ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd(); + ((int *) batch->ptr)[1] = MI_BATCH_BUFFER_END; + used += 8; + } + + /* TODO: Just pass the relocation list and dma buffer up to the + * kernel. + */ + if (!was_locked) + LOCK_HARDWARE(intel); + + do_flush_locked(batch, used, !(batch->flags & INTEL_BATCH_CLIPRECTS), + GL_FALSE); + + if (!was_locked) + UNLOCK_HARDWARE(intel); + + /* Reset the buffer: + */ + intel_batchbuffer_reset(batch); +} + +void +intel_batchbuffer_finish(struct intel_batchbuffer *batch) +{ + intel_batchbuffer_flush(batch); + if (batch->last_fence != NULL) + dri_fence_wait(batch->last_fence); +} + + +/* This is the only way buffers get added to the validate list. + */ +GLboolean +intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, + dri_bo *buffer, + GLuint flags, GLuint delta) +{ + struct buffer_reloc *r = &batch->reloc[batch->nr_relocs++]; + + assert(batch->nr_relocs <= MAX_RELOCS); + + dri_bo_reference(buffer); + r->buf = buffer; + r->offset = batch->ptr - batch->map; + r->delta = delta; + r->validate_flags = flags; + + batch->ptr += 4; + return GL_TRUE; +} + + + +void +intel_batchbuffer_data(struct intel_batchbuffer *batch, + const void *data, GLuint bytes, GLuint flags) +{ + assert((bytes & 3) == 0); + intel_batchbuffer_require_space(batch, bytes, flags); + __memcpy(batch->ptr, data, bytes); + batch->ptr += bytes; +} diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h new file mode 100644 index 0000000000..850a91e1c9 --- /dev/null +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -0,0 +1,122 @@ +#ifndef INTEL_BATCHBUFFER_H +#define INTEL_BATCHBUFFER_H + +#include "mtypes.h" +#include "dri_bufmgr.h" + +struct intel_context; + +#define BATCH_SZ 16384 +#define BATCH_RESERVED 16 + +#define MAX_RELOCS 4096 + +#define INTEL_BATCH_NO_CLIPRECTS 0x1 +#define INTEL_BATCH_CLIPRECTS 0x2 + +struct buffer_reloc +{ + dri_bo *buf; + GLuint offset; + GLuint delta; /* not needed? */ + GLuint validate_flags; +}; + +struct intel_batchbuffer +{ + struct intel_context *intel; + + dri_bo *buf; + dri_fence *last_fence; + GLuint flags; + + drmBOList list; + GLuint list_count; + GLubyte *map; + GLubyte *ptr; + + struct buffer_reloc reloc[MAX_RELOCS]; + GLuint nr_relocs; + GLuint size; +}; + +struct intel_batchbuffer *intel_batchbuffer_alloc(struct intel_context + *intel); + +void intel_batchbuffer_free(struct intel_batchbuffer *batch); + + +void intel_batchbuffer_finish(struct intel_batchbuffer *batch); + +void intel_batchbuffer_flush(struct intel_batchbuffer *batch); + +void intel_batchbuffer_reset(struct intel_batchbuffer *batch); + + +/* Unlike bmBufferData, this currently requires the buffer be mapped. + * Consider it a convenience function wrapping multple + * intel_buffer_dword() calls. + */ +void intel_batchbuffer_data(struct intel_batchbuffer *batch, + const void *data, GLuint bytes, GLuint flags); + +void intel_batchbuffer_release_space(struct intel_batchbuffer *batch, + GLuint bytes); + +GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, + dri_bo *buffer, + GLuint flags, GLuint offset); + +/* Inline functions - might actually be better off with these + * non-inlined. Certainly better off switching all command packets to + * be passed as structs rather than dwords, but that's a little bit of + * work... + */ +static INLINE GLuint +intel_batchbuffer_space(struct intel_batchbuffer *batch) +{ + return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map); +} + + +static INLINE void +intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, GLuint dword) +{ + assert(batch->map); + assert(intel_batchbuffer_space(batch) >= 4); + *(GLuint *) (batch->ptr) = dword; + batch->ptr += 4; +} + +static INLINE void +intel_batchbuffer_require_space(struct intel_batchbuffer *batch, + GLuint sz, GLuint flags) +{ + assert(sz < batch->size - 8); + if (intel_batchbuffer_space(batch) < sz || + (batch->flags != 0 && flags != 0 && batch->flags != flags)) + intel_batchbuffer_flush(batch); + + batch->flags |= flags; +} + +/* Here are the crusty old macros, to be removed: + */ +#define BATCH_LOCALS + +#define BEGIN_BATCH(n, flags) do { \ + assert(!intel->prim.flush); \ + intel_batchbuffer_require_space(intel->batch, (n)*4, flags); \ +} while (0) + +#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d) + +#define OUT_RELOC(buf, flags, delta) do { \ + assert((delta) >= 0); \ + intel_batchbuffer_emit_reloc(intel->batch, buf, flags, delta); \ +} while (0) + +#define ADVANCE_BATCH() do { } while(0) + + +#endif -- cgit v1.2.3