diff options
Diffstat (limited to 'src/mesa/drivers/dri/intel')
20 files changed, 730 insertions, 1409 deletions
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index a594fb6cc4..9ad9f6a6c0 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -29,6 +29,7 @@ #include "intel_ioctl.h" #include "intel_decode.h" #include "intel_reg.h" +#include "intel_bufmgr.h" /* Relocations in kernel space: * - pass dma buffer seperately @@ -78,11 +79,17 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch) batch->buf = NULL; } + if (!batch->buffer && intel->ttm == GL_TRUE) + batch->buffer = malloc (intel->maxBatchSize); + batch->buf = dri_bo_alloc(intel->bufmgr, "batchbuffer", - intel->maxBatchSize, 4096, - DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED); - dri_bo_map(batch->buf, GL_TRUE); - batch->map = batch->buf->virtual; + intel->maxBatchSize, 4096); + if (batch->buffer) + batch->map = batch->buffer; + else { + dri_bo_map(batch->buf, GL_TRUE); + batch->map = batch->buf->virtual; + } batch->size = intel->maxBatchSize; batch->ptr = batch->map; batch->dirty_state = ~0; @@ -99,7 +106,6 @@ intel_batchbuffer_alloc(struct intel_context *intel) struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1); batch->intel = intel; - batch->last_fence = NULL; intel_batchbuffer_reset(batch); return batch; @@ -108,14 +114,13 @@ intel_batchbuffer_alloc(struct intel_context *intel) void intel_batchbuffer_free(struct intel_batchbuffer *batch) { - if (batch->last_fence) { - dri_fence_wait(batch->last_fence); - dri_fence_unreference(batch->last_fence); - batch->last_fence = NULL; - } - if (batch->map) { - dri_bo_unmap(batch->buf); - batch->map = NULL; + if (batch->buffer) + free (batch->buffer); + else { + if (batch->map) { + dri_bo_unmap(batch->buf); + batch->map = NULL; + } } dri_bo_unreference(batch->buf); batch->buf = NULL; @@ -131,11 +136,12 @@ do_flush_locked(struct intel_batchbuffer *batch, GLuint used, GLboolean allow_unlock) { struct intel_context *intel = batch->intel; - void *start; - GLuint count; + int ret = 0; - dri_bo_unmap(batch->buf); - start = dri_process_relocs(batch->buf, &count); + if (batch->buffer) + dri_bo_subdata (batch->buf, 0, used, batch->buffer); + else + dri_bo_unmap(batch->buf); batch->map = NULL; batch->ptr = NULL; @@ -148,21 +154,25 @@ do_flush_locked(struct intel_batchbuffer *batch, if (!(intel->numClipRects == 0 && batch->cliprect_mode == LOOP_CLIPRECTS)) { if (intel->ttm == GL_TRUE) { - intel_exec_ioctl(batch->intel, - used, - batch->cliprect_mode != LOOP_CLIPRECTS, - allow_unlock, - start, count, &batch->last_fence); + struct drm_i915_gem_execbuffer *execbuf; + + execbuf = dri_process_relocs(batch->buf); + ret = intel_exec_ioctl(batch->intel, + used, + batch->cliprect_mode != LOOP_CLIPRECTS, + allow_unlock, + execbuf); } else { - intel_batch_ioctl(batch->intel, - batch->buf->offset, - used, - batch->cliprect_mode != LOOP_CLIPRECTS, - allow_unlock); + dri_process_relocs(batch->buf); + ret = intel_batch_ioctl(batch->intel, + batch->buf->offset, + used, + batch->cliprect_mode != LOOP_CLIPRECTS, + allow_unlock); } } - - dri_post_submit(batch->buf, &batch->last_fence); + + dri_post_submit(batch->buf); if (intel->numClipRects == 0 && batch->cliprect_mode == LOOP_CLIPRECTS) { @@ -187,6 +197,10 @@ do_flush_locked(struct intel_batchbuffer *batch, intel->vtbl.debug_batch(intel); } + if (ret != 0) { + UNLOCK_HARDWARE(intel); + exit(1); + } intel->vtbl.new_batch(intel); } @@ -204,21 +218,27 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, if (INTEL_DEBUG & DEBUG_BATCH) fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line, used); - /* Add the MI_BATCH_BUFFER_END. Always add an MI_FLUSH - this is a - * performance drain that we would like to avoid. - */ - if (used & 4) { - ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd(); - ((int *) batch->ptr)[1] = 0; - ((int *) batch->ptr)[2] = MI_BATCH_BUFFER_END; - used += 12; + + /* Emit a flush if the bufmgr doesn't do it for us. */ + if (!intel->ttm) { + *(GLuint *) (batch->ptr) = intel->vtbl.flush_cmd(); + batch->ptr += 4; + used = batch->ptr - batch->map; } - else { - ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd(); - ((int *) batch->ptr)[1] = MI_BATCH_BUFFER_END; - used += 8; + + /* Round batchbuffer usage to 2 DWORDs. */ + + if ((used & 4) == 0) { + *(GLuint *) (batch->ptr) = 0; /* noop */ + batch->ptr += 4; + used = batch->ptr - batch->map; } + /* Mark the end of the buffer. */ + *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END; /* noop */ + batch->ptr += 4; + used = batch->ptr - batch->map; + /* Workaround for recursive batchbuffer flushing: If the window is * moved, we can get into a case where we try to flush during a * flush. What happens is that when we try to grab the lock for @@ -230,6 +250,9 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, * avoid that in the first place. */ batch->ptr = batch->map; + if (intel->vtbl.finish_batch) + intel->vtbl.finish_batch(intel); + /* TODO: Just pass the relocation list and dma buffer up to the * kernel. */ @@ -242,9 +265,13 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, UNLOCK_HARDWARE(intel); if (INTEL_DEBUG & DEBUG_SYNC) { + int irq; + fprintf(stderr, "waiting for idle\n"); - if (batch->last_fence != NULL) - dri_fence_wait(batch->last_fence); + LOCK_HARDWARE(intel); + irq = intelEmitIrqLocked(intel); + UNLOCK_HARDWARE(intel); + intelWaitIrq(intel, irq); } /* Reset the buffer: @@ -252,25 +279,22 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, intel_batchbuffer_reset(batch); } -void -intel_batchbuffer_finish(struct intel_batchbuffer *batch) -{ - intel_batchbuffer_flush(batch); - if (batch->last_fence != NULL) - dri_fence_wait(batch->last_fence); -} - /* This is the only way buffers get added to the validate list. */ GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, dri_bo *buffer, - GLuint flags, GLuint delta) + uint32_t read_domains, uint32_t write_domain, + uint32_t delta) { int ret; - ret = dri_emit_reloc(batch->buf, flags, delta, batch->ptr - batch->map, buffer); + if (batch->ptr - batch->map > batch->buf->size) + _mesa_printf ("bad relocation ptr %p map %p offset %d size %d\n", + batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size); + ret = intel_bo_emit_reloc(batch->buf, read_domains, write_domain, + delta, batch->ptr - batch->map, buffer); /* * Using the old buffer offset, write in what the right data would be, in case diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h index 2d636df2ce..d3c656c803 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -40,7 +40,8 @@ struct intel_batchbuffer struct intel_context *intel; dri_bo *buf; - dri_fence *last_fence; + + GLubyte *buffer; GLubyte *map; GLubyte *ptr; @@ -58,8 +59,6 @@ struct intel_batchbuffer *intel_batchbuffer_alloc(struct intel_context void intel_batchbuffer_free(struct intel_batchbuffer *batch); -void intel_batchbuffer_finish(struct intel_batchbuffer *batch); - void _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, int line); @@ -82,14 +81,16 @@ void intel_batchbuffer_release_space(struct intel_batchbuffer *batch, GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, dri_bo *buffer, - GLuint flags, GLuint offset); + uint32_t read_domains, + uint32_t write_domain, + uint32_t offset); /* Inline functions - might actually be better off with these * non-inlined. Certainly better off switching all command packets to * be passed as structs rather than dwords, but that's a little bit of * work... */ -static INLINE GLuint +static INLINE GLint intel_batchbuffer_space(struct intel_batchbuffer *batch) { return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map); @@ -134,9 +135,10 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch, #define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d) -#define OUT_RELOC(buf, cliprect_mode, delta) do { \ +#define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ assert((delta) >= 0); \ - intel_batchbuffer_emit_reloc(intel->batch, buf, cliprect_mode, delta); \ + intel_batchbuffer_emit_reloc(intel->batch, buf, \ + read_domains, write_domain, delta); \ } while (0) #define ADVANCE_BATCH() do { } while(0) diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index d9dbbb2482..80d11a01b7 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -66,14 +66,6 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, intelScreen = intel->intelScreen; - if (intel->last_swap_fence) { - dri_fence_wait(intel->last_swap_fence); - dri_fence_unreference(intel->last_swap_fence); - intel->last_swap_fence = NULL; - } - intel->last_swap_fence = intel->first_swap_fence; - intel->first_swap_fence = NULL; - /* The LOCK_HARDWARE is required for the cliprects. Buffer offsets * should work regardless. */ @@ -156,19 +148,26 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, OUT_BATCH((box.y1 << 16) | box.x1); OUT_BATCH((box.y2 << 16) | box.x2); - OUT_RELOC(dst->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, 0); + OUT_RELOC(dst->buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0); OUT_BATCH((src_y << 16) | src_x); OUT_BATCH(src_pitch); - OUT_RELOC(src->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0); + OUT_RELOC(src->buffer, + I915_GEM_DOMAIN_RENDER, 0, + 0); ADVANCE_BATCH(); } - if (intel->first_swap_fence) - dri_fence_unreference(intel->first_swap_fence); + /* Emit a flush so that, on systems where we don't have automatic flushing + * set (such as 965), the results all land on the screen in a timely + * fashion. + */ + BEGIN_BATCH(1, IGNORE_CLIPRECTS); + OUT_BATCH(MI_FLUSH); + ADVANCE_BATCH(); + intel_batchbuffer_flush(intel->batch); - intel->first_swap_fence = intel->batch->last_fence; - if (intel->first_swap_fence) - dri_fence_reference(intel->first_swap_fence); } UNLOCK_HARDWARE(intel); @@ -225,7 +224,9 @@ intelEmitFillBlit(struct intel_context *intel, OUT_BATCH(BR13 | dst_pitch); OUT_BATCH((y << 16) | x); OUT_BATCH(((y + h) << 16) | (x + w)); - OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset); + OUT_RELOC(dst_buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + dst_offset); OUT_BATCH(color); ADVANCE_BATCH(); } @@ -342,11 +343,13 @@ intelEmitCopyBlit(struct intel_context *intel, OUT_BATCH(BR13 | dst_pitch); OUT_BATCH((dst_y << 16) | dst_x); OUT_BATCH((dst_y2 << 16) | dst_x2); - OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, + OUT_RELOC(dst_buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, dst_offset); OUT_BATCH((src_y << 16) | src_x); OUT_BATCH(src_pitch); - OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + OUT_RELOC(src_buffer, + I915_GEM_DOMAIN_RENDER, 0, src_offset); ADVANCE_BATCH(); } @@ -359,14 +362,20 @@ intelEmitCopyBlit(struct intel_context *intel, OUT_BATCH(BR13 | ((uint16_t)dst_pitch)); OUT_BATCH((0 << 16) | dst_x); OUT_BATCH((h << 16) | dst_x2); - OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, + OUT_RELOC(dst_buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, dst_offset + dst_y * dst_pitch); OUT_BATCH((0 << 16) | src_x); OUT_BATCH(src_pitch); - OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + OUT_RELOC(src_buffer, + I915_GEM_DOMAIN_RENDER, 0, src_offset + src_y * src_pitch); ADVANCE_BATCH(); } + BEGIN_BATCH(1, NO_LOOP_CLIPRECTS); + OUT_BATCH(MI_FLUSH); + ADVANCE_BATCH(); + intel_batchbuffer_flush(intel->batch); } @@ -538,7 +547,8 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) OUT_BATCH(BR13); OUT_BATCH((b.y1 << 16) | b.x1); OUT_BATCH((b.y2 << 16) | b.x2); - OUT_RELOC(write_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, + OUT_RELOC(write_buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, irb_region->draw_offset); OUT_BATCH(clearVal); ADVANCE_BATCH(); @@ -611,7 +621,9 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, OUT_BATCH(br13); OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */ OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */ - OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset); + OUT_RELOC(dst_buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + dst_offset); OUT_BATCH(0); /* bg */ OUT_BATCH(fg_color); /* fg */ OUT_BATCH(0); /* pattern base addr */ diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index 951b8cbfb7..4227f0c973 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -45,8 +45,7 @@ intel_bufferobj_alloc_buffer(struct intel_context *intel, struct intel_buffer_object *intel_obj) { intel_obj->buffer = dri_bo_alloc(intel->bufmgr, "bufferobj", - intel_obj->Base.Size, 64, - DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED); + intel_obj->Base.Size, 64); } /** diff --git a/src/mesa/drivers/dri/intel/intel_buffers.c b/src/mesa/drivers/dri/intel/intel_buffers.c index 2a25f079e9..d5257ae27e 100644 --- a/src/mesa/drivers/dri/intel/intel_buffers.c +++ b/src/mesa/drivers/dri/intel/intel_buffers.c @@ -848,6 +848,8 @@ intelSwapBuffers(__DRIdrawablePrivate * dPriv) intel_fb->swap_ust = ust; } + drmCommandNone(intel->driFd, DRM_I915_GEM_THROTTLE); + } else { /* XXX this shouldn't be an error but we can't handle it for now */ diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c deleted file mode 100644 index 194814e8fb..0000000000 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c +++ /dev/null @@ -1,1122 +0,0 @@ -/************************************************************************** - * - * Copyright © 2007 Red Hat Inc. - * Copyright © 2007 Intel Corporation - * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * - **************************************************************************/ -/* - * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com> - * Keith Whitwell <keithw-at-tungstengraphics-dot-com> - * Eric Anholt <eric@anholt.net> - * Dave Airlie <airlied@linux.ie> - */ - -#include <xf86drm.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <assert.h> - -#include "errno.h" -#include "mtypes.h" -#include "dri_bufmgr.h" -#include "string.h" -#include "imports.h" - -#include "i915_drm.h" - -#include "intel_bufmgr_ttm.h" -#ifdef TTM_API - -#define DBG(...) do { \ - if (bufmgr_ttm->bufmgr.debug) \ - fprintf(stderr, __VA_ARGS__); \ -} while (0) - -/* - * These bits are always specified in each validation - * request. Other bits are not supported at this point - * as it would require a bit of investigation to figure - * out what mask value should be used. - */ -#define INTEL_BO_MASK (DRM_BO_MASK_MEM | \ - DRM_BO_FLAG_READ | \ - DRM_BO_FLAG_WRITE | \ - DRM_BO_FLAG_EXE) - -struct intel_validate_entry { - dri_bo *bo; - struct drm_i915_op_arg bo_arg; -}; - -struct dri_ttm_bo_bucket_entry { - drmBO drm_bo; - struct dri_ttm_bo_bucket_entry *next; -}; - -struct dri_ttm_bo_bucket { - struct dri_ttm_bo_bucket_entry *head; - struct dri_ttm_bo_bucket_entry **tail; - /** - * Limit on the number of entries in this bucket. - * - * 0 means that this caching at this bucket size is disabled. - * -1 means that there is no limit to caching at this size. - */ - int max_entries; - int num_entries; -}; - -/* Arbitrarily chosen, 16 means that the maximum size we'll cache for reuse - * is 1 << 16 pages, or 256MB. - */ -#define INTEL_TTM_BO_BUCKETS 16 -typedef struct _dri_bufmgr_ttm { - dri_bufmgr bufmgr; - - int fd; - unsigned int fence_type; - unsigned int fence_type_flush; - - uint32_t max_relocs; - - struct intel_validate_entry *validate_array; - int validate_array_size; - int validate_count; - - /** Array of lists of cached drmBOs of power-of-two sizes */ - struct dri_ttm_bo_bucket cache_bucket[INTEL_TTM_BO_BUCKETS]; -} dri_bufmgr_ttm; - -/** - * Private information associated with a relocation that isn't already stored - * in the relocation buffer to be passed to the kernel. - */ -struct dri_ttm_reloc { - dri_bo *target_buf; - uint64_t validate_flags; - /** Offset of target_buf after last execution of this relocation entry. */ - unsigned int last_target_offset; -}; - -typedef struct _dri_bo_ttm { - dri_bo bo; - - int refcount; - unsigned int map_count; - drmBO drm_bo; - const char *name; - - uint64_t last_flags; - - /** - * Index of the buffer within the validation list while preparing a - * batchbuffer execution. - */ - int validate_index; - - /** DRM buffer object containing relocation list */ - uint32_t *reloc_buf_data; - struct dri_ttm_reloc *relocs; - - /** - * Indicates that the buffer may be shared with other processes, so we - * can't hold maps beyond when the user does. - */ - GLboolean shared; - - GLboolean delayed_unmap; - /* Virtual address from the dri_bo_map whose unmap was delayed. */ - void *saved_virtual; -} dri_bo_ttm; - -typedef struct _dri_fence_ttm -{ - dri_fence fence; - - int refcount; - const char *name; - drmFence drm_fence; -} dri_fence_ttm; - -static int -logbase2(int n) -{ - GLint i = 1; - GLint log2 = 0; - - while (n > i) { - i *= 2; - log2++; - } - - return log2; -} - -static struct dri_ttm_bo_bucket * -dri_ttm_bo_bucket_for_size(dri_bufmgr_ttm *bufmgr_ttm, unsigned long size) -{ - int i; - - /* We only do buckets in power of two increments */ - if ((size & (size - 1)) != 0) - return NULL; - - /* We should only see sizes rounded to pages. */ - assert((size % 4096) == 0); - - /* We always allocate in units of pages */ - i = ffs(size / 4096) - 1; - if (i >= INTEL_TTM_BO_BUCKETS) - return NULL; - - return &bufmgr_ttm->cache_bucket[i]; -} - - -static void dri_ttm_dump_validation_list(dri_bufmgr_ttm *bufmgr_ttm) -{ - int i, j; - - for (i = 0; i < bufmgr_ttm->validate_count; i++) { - dri_bo *bo = bufmgr_ttm->validate_array[i].bo; - dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; - - if (bo_ttm->reloc_buf_data != NULL) { - for (j = 0; j < (bo_ttm->reloc_buf_data[0] & 0xffff); j++) { - uint32_t *reloc_entry = bo_ttm->reloc_buf_data + - I915_RELOC_HEADER + - j * I915_RELOC0_STRIDE; - dri_bo *target_bo = bo_ttm->relocs[j].target_buf; - dri_bo_ttm *target_ttm = (dri_bo_ttm *)target_bo; - - DBG("%2d: %s@0x%08x -> %s@0x%08lx + 0x%08x\n", - i, - bo_ttm->name, reloc_entry[0], - target_ttm->name, target_bo->offset, - reloc_entry[1]); - } - } else { - DBG("%2d: %s\n", i, bo_ttm->name); - } - } -} - -/** - * Adds the given buffer to the list of buffers to be validated (moved into the - * appropriate memory type) with the next batch submission. - * - * If a buffer is validated multiple times in a batch submission, it ends up - * with the intersection of the memory type flags and the union of the - * access flags. - */ -static void -intel_add_validate_buffer(dri_bo *buf, - uint64_t flags) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr; - dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; - - /* If we delayed doing an unmap to mitigate map/unmap syscall thrashing, - * do that now. - */ - if (ttm_buf->delayed_unmap) { - drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo); - ttm_buf->delayed_unmap = GL_FALSE; - } - - if (ttm_buf->validate_index == -1) { - struct intel_validate_entry *entry; - struct drm_i915_op_arg *arg; - struct drm_bo_op_req *req; - int index; - - /* Extend the array of validation entries as necessary. */ - if (bufmgr_ttm->validate_count == bufmgr_ttm->validate_array_size) { - int i, new_size = bufmgr_ttm->validate_array_size * 2; - - if (new_size == 0) - new_size = 5; - - bufmgr_ttm->validate_array = - realloc(bufmgr_ttm->validate_array, - sizeof(struct intel_validate_entry) * new_size); - bufmgr_ttm->validate_array_size = new_size; - - /* Update pointers for realloced mem. */ - for (i = 0; i < bufmgr_ttm->validate_count - 1; i++) { - bufmgr_ttm->validate_array[i].bo_arg.next = (unsigned long) - &bufmgr_ttm->validate_array[i + 1].bo_arg; - } - } - - /* Pick out the new array entry for ourselves */ - index = bufmgr_ttm->validate_count; - ttm_buf->validate_index = index; - entry = &bufmgr_ttm->validate_array[index]; - bufmgr_ttm->validate_count++; - - /* Fill in array entry */ - entry->bo = buf; - dri_bo_reference(buf); - - /* Fill in kernel arg */ - arg = &entry->bo_arg; - req = &arg->d.req; - - memset(arg, 0, sizeof(*arg)); - req->bo_req.handle = ttm_buf->drm_bo.handle; - req->op = drm_bo_validate; - req->bo_req.flags = flags; - req->bo_req.hint = 0; -#ifdef DRM_BO_HINT_PRESUMED_OFFSET - /* PRESUMED_OFFSET indicates that all relocations pointing at this - * buffer have the correct offset. If any of our relocations don't, - * this flag will be cleared off the buffer later in the relocation - * processing. - */ - req->bo_req.hint |= DRM_BO_HINT_PRESUMED_OFFSET; - req->bo_req.presumed_offset = buf->offset; -#endif - req->bo_req.mask = INTEL_BO_MASK; - req->bo_req.fence_class = 0; /* Backwards compat. */ - - if (ttm_buf->reloc_buf_data != NULL) - arg->reloc_ptr = (unsigned long)(void *)ttm_buf->reloc_buf_data; - else - arg->reloc_ptr = 0; - - /* Hook up the linked list of args for the kernel */ - arg->next = 0; - if (index != 0) { - bufmgr_ttm->validate_array[index - 1].bo_arg.next = - (unsigned long)arg; - } - } else { - struct intel_validate_entry *entry = - &bufmgr_ttm->validate_array[ttm_buf->validate_index]; - struct drm_i915_op_arg *arg = &entry->bo_arg; - struct drm_bo_op_req *req = &arg->d.req; - uint64_t memFlags = req->bo_req.flags & flags & DRM_BO_MASK_MEM; - uint64_t modeFlags = (req->bo_req.flags | flags) & ~DRM_BO_MASK_MEM; - - /* Buffer was already in the validate list. Extend its flags as - * necessary. - */ - - if (memFlags == 0) { - fprintf(stderr, - "%s: No shared memory types between " - "0x%16llx and 0x%16llx\n", - __FUNCTION__, req->bo_req.flags, flags); - abort(); - } - if (flags & ~INTEL_BO_MASK) { - fprintf(stderr, - "%s: Flags bits 0x%16llx are not supposed to be used in a relocation\n", - __FUNCTION__, flags & ~INTEL_BO_MASK); - abort(); - } - req->bo_req.flags = memFlags | modeFlags; - } -} - - -#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ - sizeof(uint32_t)) - -static int -intel_setup_reloc_list(dri_bo *bo) -{ - dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bo->bufmgr; - - bo_ttm->relocs = calloc(bufmgr_ttm->max_relocs, - sizeof(struct dri_ttm_reloc)); - bo_ttm->reloc_buf_data = calloc(1, RELOC_BUF_SIZE(bufmgr_ttm->max_relocs)); - - /* Initialize the relocation list with the header: - * DWORD 0: relocation count - * DWORD 1: relocation type - * DWORD 2+3: handle to next relocation list (currently none) 64-bits - */ - bo_ttm->reloc_buf_data[0] = 0; - bo_ttm->reloc_buf_data[1] = I915_RELOC_TYPE_0; - bo_ttm->reloc_buf_data[2] = 0; - bo_ttm->reloc_buf_data[3] = 0; - - return 0; -} - -#if 0 -int -driFenceSignaled(DriFenceObject * fence, unsigned type) -{ - int signaled; - int ret; - - if (fence == NULL) - return GL_TRUE; - - ret = drmFenceSignaled(bufmgr_ttm->fd, &fence->fence, type, &signaled); - BM_CKFATAL(ret); - return signaled; -} -#endif - -static dri_bo * -dri_ttm_alloc(dri_bufmgr *bufmgr, const char *name, - unsigned long size, unsigned int alignment, - uint64_t location_mask) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; - dri_bo_ttm *ttm_buf; - unsigned int pageSize = getpagesize(); - int ret; - uint64_t flags; - unsigned int hint; - unsigned long alloc_size; - struct dri_ttm_bo_bucket *bucket; - GLboolean alloc_from_cache = GL_FALSE; - - ttm_buf = calloc(1, sizeof(*ttm_buf)); - if (!ttm_buf) - return NULL; - - /* The mask argument doesn't do anything for us that we want other than - * determine which pool (TTM or local) the buffer is allocated into, so - * just pass all of the allocation class flags. - */ - flags = location_mask | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE | - DRM_BO_FLAG_EXE; - /* No hints we want to use. */ - hint = 0; - - /* Round the allocated size up to a power of two number of pages. */ - alloc_size = 1 << logbase2(size); - if (alloc_size < pageSize) - alloc_size = pageSize; - bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, alloc_size); - - /* If we don't have caching at this size, don't actually round the - * allocation up. - */ - if (bucket == NULL || bucket->max_entries == 0) - alloc_size = size; - - /* Get a buffer out of the cache if available */ - if (bucket != NULL && bucket->num_entries > 0) { - struct dri_ttm_bo_bucket_entry *entry = bucket->head; - int busy; - - /* Check if the buffer is still in flight. If not, reuse it. */ - ret = drmBOBusy(bufmgr_ttm->fd, &entry->drm_bo, &busy); - alloc_from_cache = (ret == 0 && busy == 0); - - if (alloc_from_cache) { - bucket->head = entry->next; - if (entry->next == NULL) - bucket->tail = &bucket->head; - bucket->num_entries--; - - ttm_buf->drm_bo = entry->drm_bo; - free(entry); - } - } - - if (!alloc_from_cache) { - ret = drmBOCreate(bufmgr_ttm->fd, alloc_size, alignment / pageSize, - NULL, flags, hint, &ttm_buf->drm_bo); - if (ret != 0) { - free(ttm_buf); - return NULL; - } - } - - ttm_buf->bo.size = size; - ttm_buf->bo.offset = ttm_buf->drm_bo.offset; - ttm_buf->bo.virtual = NULL; - ttm_buf->bo.bufmgr = bufmgr; - ttm_buf->name = name; - ttm_buf->refcount = 1; - ttm_buf->reloc_buf_data = NULL; - ttm_buf->relocs = NULL; - ttm_buf->last_flags = ttm_buf->drm_bo.flags; - ttm_buf->shared = GL_FALSE; - ttm_buf->delayed_unmap = GL_FALSE; - ttm_buf->validate_index = -1; - - DBG("bo_create: %p (%s) %ldb\n", &ttm_buf->bo, ttm_buf->name, size); - - return &ttm_buf->bo; -} - -/* Our TTM backend doesn't allow creation of static buffers, as that requires - * privelege for the non-fake case, and the lock in the fake case where we were - * working around the X Server not creating buffers and passing handles to us. - */ -static dri_bo * -dri_ttm_alloc_static(dri_bufmgr *bufmgr, const char *name, - unsigned long offset, unsigned long size, void *virtual, - uint64_t location_mask) -{ - return NULL; -} - -/** - * Returns a dri_bo wrapping the given buffer object handle. - * - * This can be used when one application needs to pass a buffer object - * to another. - */ -dri_bo * -intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, - unsigned int handle) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; - dri_bo_ttm *ttm_buf; - int ret; - - ttm_buf = calloc(1, sizeof(*ttm_buf)); - if (!ttm_buf) - return NULL; - - ret = drmBOReference(bufmgr_ttm->fd, handle, &ttm_buf->drm_bo); - if (ret != 0) { - fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n", - name, handle, strerror(-ret)); - free(ttm_buf); - return NULL; - } - ttm_buf->bo.size = ttm_buf->drm_bo.size; - ttm_buf->bo.offset = ttm_buf->drm_bo.offset; - ttm_buf->bo.virtual = NULL; - ttm_buf->bo.bufmgr = bufmgr; - ttm_buf->name = name; - ttm_buf->refcount = 1; - ttm_buf->reloc_buf_data = NULL; - ttm_buf->relocs = NULL; - ttm_buf->last_flags = ttm_buf->drm_bo.flags; - ttm_buf->shared = GL_TRUE; - ttm_buf->delayed_unmap = GL_FALSE; - ttm_buf->validate_index = -1; - - DBG("bo_create_from_handle: %p %08x (%s)\n", - &ttm_buf->bo, handle, ttm_buf->name); - - return &ttm_buf->bo; -} - -static void -dri_ttm_bo_reference(dri_bo *buf) -{ - dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; - - ttm_buf->refcount++; -} - -static void -dri_ttm_bo_unreference(dri_bo *buf) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr; - dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; - - if (!buf) - return; - - if (--ttm_buf->refcount == 0) { - struct dri_ttm_bo_bucket *bucket; - int ret; - - assert(ttm_buf->map_count == 0); - - if (ttm_buf->reloc_buf_data) { - int i; - - /* Unreference all the target buffers */ - for (i = 0; i < (ttm_buf->reloc_buf_data[0] & 0xffff); i++) - dri_bo_unreference(ttm_buf->relocs[i].target_buf); - free(ttm_buf->relocs); - - /* Free the kernel BO containing relocation entries */ - free(ttm_buf->reloc_buf_data); - ttm_buf->reloc_buf_data = NULL; - } - - if (ttm_buf->delayed_unmap) { - int ret = drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo); - - if (ret != 0) { - fprintf(stderr, "%s:%d: Error unmapping buffer %s: %s.\n", - __FILE__, __LINE__, ttm_buf->name, strerror(-ret)); - } - } - - bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, ttm_buf->drm_bo.size); - /* Put the buffer into our internal cache for reuse if we can. */ - if (!ttm_buf->shared && - bucket != NULL && - (bucket->max_entries == -1 || - (bucket->max_entries > 0 && - bucket->num_entries < bucket->max_entries))) - { - struct dri_ttm_bo_bucket_entry *entry; - - entry = calloc(1, sizeof(*entry)); - entry->drm_bo = ttm_buf->drm_bo; - - entry->next = NULL; - *bucket->tail = entry; - bucket->tail = &entry->next; - bucket->num_entries++; - } else { - /* Decrement the kernel refcount for the buffer. */ - ret = drmBOUnreference(bufmgr_ttm->fd, &ttm_buf->drm_bo); - if (ret != 0) { - fprintf(stderr, "drmBOUnreference failed (%s): %s\n", - ttm_buf->name, strerror(-ret)); - } - } - - DBG("bo_unreference final: %p (%s)\n", &ttm_buf->bo, ttm_buf->name); - - free(buf); - return; - } -} - -static int -dri_ttm_bo_map(dri_bo *buf, GLboolean write_enable) -{ - dri_bufmgr_ttm *bufmgr_ttm; - dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; - uint64_t flags; - int ret; - - bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr; - - flags = DRM_BO_FLAG_READ; - if (write_enable) - flags |= DRM_BO_FLAG_WRITE; - - /* Allow recursive mapping. Mesa may recursively map buffers with - * nested display loops. - */ - if (ttm_buf->map_count++ != 0) - return 0; - - assert(buf->virtual == NULL); - - DBG("bo_map: %p (%s)\n", &ttm_buf->bo, ttm_buf->name); - - /* XXX: What about if we're upgrading from READ to WRITE? */ - if (ttm_buf->delayed_unmap) { - buf->virtual = ttm_buf->saved_virtual; - return 0; - } - - ret = drmBOMap(bufmgr_ttm->fd, &ttm_buf->drm_bo, flags, 0, &buf->virtual); - if (ret != 0) { - fprintf(stderr, "%s:%d: Error mapping buffer %s: %s .\n", - __FILE__, __LINE__, ttm_buf->name, strerror(-ret)); - } - - return ret; -} - -static int -dri_ttm_bo_unmap(dri_bo *buf) -{ - dri_bufmgr_ttm *bufmgr_ttm; - dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; - int ret; - - if (buf == NULL) - return 0; - - assert(ttm_buf->map_count != 0); - if (--ttm_buf->map_count != 0) - return 0; - - bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr; - - assert(buf->virtual != NULL); - - DBG("bo_unmap: %p (%s)\n", &ttm_buf->bo, ttm_buf->name); - - if (!ttm_buf->shared) { - ttm_buf->saved_virtual = buf->virtual; - ttm_buf->delayed_unmap = GL_TRUE; - buf->virtual = NULL; - - return 0; - } - - buf->virtual = NULL; - - ret = drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo); - if (ret != 0) { - fprintf(stderr, "%s:%d: Error unmapping buffer %s: %s.\n", - __FILE__, __LINE__, ttm_buf->name, strerror(-ret)); - } - - return ret; -} - -/** - * Returns a dri_bo wrapping the given buffer object handle. - * - * This can be used when one application needs to pass a buffer object - * to another. - */ -dri_fence * -intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name, - drm_fence_arg_t *arg) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; - dri_fence_ttm *ttm_fence; - - ttm_fence = malloc(sizeof(*ttm_fence)); - if (!ttm_fence) - return NULL; - - ttm_fence->drm_fence.handle = arg->handle; - ttm_fence->drm_fence.fence_class = arg->fence_class; - ttm_fence->drm_fence.type = arg->type; - ttm_fence->drm_fence.flags = arg->flags; - ttm_fence->drm_fence.signaled = 0; - ttm_fence->drm_fence.sequence = arg->sequence; - - ttm_fence->fence.bufmgr = bufmgr; - ttm_fence->name = name; - ttm_fence->refcount = 1; - - DBG("fence_create_from_handle: %p (%s)\n", - &ttm_fence->fence, ttm_fence->name); - - return &ttm_fence->fence; -} - - -static void -dri_ttm_fence_reference(dri_fence *fence) -{ - dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence; - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr; - - ++fence_ttm->refcount; - DBG("fence_reference: %p (%s)\n", &fence_ttm->fence, fence_ttm->name); -} - -static void -dri_ttm_fence_unreference(dri_fence *fence) -{ - dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence; - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr; - - if (!fence) - return; - - DBG("fence_unreference: %p (%s)\n", &fence_ttm->fence, fence_ttm->name); - - if (--fence_ttm->refcount == 0) { - int ret; - - ret = drmFenceUnreference(bufmgr_ttm->fd, &fence_ttm->drm_fence); - if (ret != 0) { - fprintf(stderr, "drmFenceUnreference failed (%s): %s\n", - fence_ttm->name, strerror(-ret)); - } - - free(fence); - return; - } -} - -static void -dri_ttm_fence_wait(dri_fence *fence) -{ - dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence; - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr; - int ret; - - ret = drmFenceWait(bufmgr_ttm->fd, DRM_FENCE_FLAG_WAIT_LAZY, &fence_ttm->drm_fence, 0); - if (ret != 0) { - fprintf(stderr, "%s:%d: Error waiting for fence %s: %s.\n", - __FILE__, __LINE__, fence_ttm->name, strerror(-ret)); - abort(); - } - - DBG("fence_wait: %p (%s)\n", &fence_ttm->fence, fence_ttm->name); -} - -static void -dri_bufmgr_ttm_destroy(dri_bufmgr *bufmgr) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; - int i; - - free(bufmgr_ttm->validate_array); - - /* Free any cached buffer objects we were going to reuse */ - for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) { - struct dri_ttm_bo_bucket *bucket = &bufmgr_ttm->cache_bucket[i]; - struct dri_ttm_bo_bucket_entry *entry; - - while ((entry = bucket->head) != NULL) { - int ret; - - bucket->head = entry->next; - if (entry->next == NULL) - bucket->tail = &bucket->head; - bucket->num_entries--; - - /* Decrement the kernel refcount for the buffer. */ - ret = drmBOUnreference(bufmgr_ttm->fd, &entry->drm_bo); - if (ret != 0) { - fprintf(stderr, "drmBOUnreference failed: %s\n", - strerror(-ret)); - } - - free(entry); - } - } - - free(bufmgr); -} - -/** - * Adds the target buffer to the validation list and adds the relocation - * to the reloc_buffer's relocation list. - * - * The relocation entry at the given offset must already contain the - * precomputed relocation value, because the kernel will optimize out - * the relocation entry write when the buffer hasn't moved from the - * last known offset in target_buf. - */ -static int -dri_ttm_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta, - GLuint offset, dri_bo *target_buf) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)reloc_buf->bufmgr; - dri_bo_ttm *reloc_buf_ttm = (dri_bo_ttm *)reloc_buf; - dri_bo_ttm *target_buf_ttm = (dri_bo_ttm *)target_buf; - int num_relocs; - uint32_t *this_reloc; - - /* Create a new relocation list if needed */ - if (reloc_buf_ttm->reloc_buf_data == NULL) - intel_setup_reloc_list(reloc_buf); - - num_relocs = reloc_buf_ttm->reloc_buf_data[0]; - - /* Check overflow */ - assert(num_relocs < bufmgr_ttm->max_relocs); - - this_reloc = reloc_buf_ttm->reloc_buf_data + I915_RELOC_HEADER + - num_relocs * I915_RELOC0_STRIDE; - - this_reloc[0] = offset; - this_reloc[1] = delta; - this_reloc[2] = target_buf_ttm->drm_bo.handle; /* To be filled in at exec time */ - this_reloc[3] = 0; - - reloc_buf_ttm->relocs[num_relocs].validate_flags = flags; - reloc_buf_ttm->relocs[num_relocs].target_buf = target_buf; - dri_bo_reference(target_buf); - - reloc_buf_ttm->reloc_buf_data[0]++; /* Increment relocation count */ - /* Check wraparound */ - assert(reloc_buf_ttm->reloc_buf_data[0] != 0); - return 0; -} - -/** - * Walk the tree of relocations rooted at BO and accumulate the list of - * validations to be performed and update the relocation buffers with - * index values into the validation list. - */ -static void -dri_ttm_bo_process_reloc(dri_bo *bo) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bo->bufmgr; - dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; - unsigned int nr_relocs; - int i; - - if (bo_ttm->reloc_buf_data == NULL) - return; - - nr_relocs = bo_ttm->reloc_buf_data[0] & 0xffff; - - for (i = 0; i < nr_relocs; i++) { - struct dri_ttm_reloc *r = &bo_ttm->relocs[i]; - - /* Continue walking the tree depth-first. */ - dri_ttm_bo_process_reloc(r->target_buf); - - /* Add the target to the validate list */ - intel_add_validate_buffer(r->target_buf, r->validate_flags); - - /* Clear the PRESUMED_OFFSET flag from the validate list entry of the - * target if this buffer has a stale relocated pointer at it. - */ - if (r->last_target_offset != r->target_buf->offset) { - dri_bo_ttm *target_buf_ttm = (dri_bo_ttm *)r->target_buf; - struct intel_validate_entry *entry = - &bufmgr_ttm->validate_array[target_buf_ttm->validate_index]; - - entry->bo_arg.d.req.bo_req.hint &= ~DRM_BO_HINT_PRESUMED_OFFSET; - } - } -} - -static void * -dri_ttm_process_reloc(dri_bo *batch_buf, GLuint *count) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr; - - /* Update indices and set up the validate list. */ - dri_ttm_bo_process_reloc(batch_buf); - - /* Add the batch buffer to the validation list. There are no relocations - * pointing to it. - */ - intel_add_validate_buffer(batch_buf, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE); - - *count = bufmgr_ttm->validate_count; - return &bufmgr_ttm->validate_array[0].bo_arg; -} - -static const char * -intel_get_flags_mem_type_string(uint64_t flags) -{ - switch (flags & DRM_BO_MASK_MEM) { - case DRM_BO_FLAG_MEM_LOCAL: return "local"; - case DRM_BO_FLAG_MEM_TT: return "ttm"; - case DRM_BO_FLAG_MEM_VRAM: return "vram"; - case DRM_BO_FLAG_MEM_PRIV0: return "priv0"; - case DRM_BO_FLAG_MEM_PRIV1: return "priv1"; - case DRM_BO_FLAG_MEM_PRIV2: return "priv2"; - case DRM_BO_FLAG_MEM_PRIV3: return "priv3"; - case DRM_BO_FLAG_MEM_PRIV4: return "priv4"; - default: return NULL; - } -} - -static const char * -intel_get_flags_caching_string(uint64_t flags) -{ - switch (flags & (DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED)) { - case 0: return "UU"; - case DRM_BO_FLAG_CACHED: return "CU"; - case DRM_BO_FLAG_CACHED_MAPPED: return "UC"; - case DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED: return "CC"; - default: return NULL; - } -} - -static void -intel_update_buffer_offsets (dri_bufmgr_ttm *bufmgr_ttm) -{ - int i; - - for (i = 0; i < bufmgr_ttm->validate_count; i++) { - dri_bo *bo = bufmgr_ttm->validate_array[i].bo; - dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; - struct drm_i915_op_arg *arg = &bufmgr_ttm->validate_array[i].bo_arg; - struct drm_bo_arg_rep *rep = &arg->d.rep; - - /* Update the flags */ - if (rep->bo_info.flags != bo_ttm->last_flags) { - DBG("BO %s migrated: %s/%s -> %s/%s\n", - bo_ttm->name, - intel_get_flags_mem_type_string(bo_ttm->last_flags), - intel_get_flags_caching_string(bo_ttm->last_flags), - intel_get_flags_mem_type_string(rep->bo_info.flags), - intel_get_flags_caching_string(rep->bo_info.flags)); - - bo_ttm->last_flags = rep->bo_info.flags; - } - /* Update the buffer offset */ - if (rep->bo_info.offset != bo->offset) { - DBG("BO %s migrated: 0x%08lx -> 0x%08lx\n", - bo_ttm->name, bo->offset, (unsigned long)rep->bo_info.offset); - bo->offset = rep->bo_info.offset; - } - } -} - -/** - * Update the last target offset field of relocation entries for PRESUMED_OFFSET - * computation. - */ -static void -dri_ttm_bo_post_submit(dri_bo *bo) -{ - dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; - unsigned int nr_relocs; - int i; - - if (bo_ttm->reloc_buf_data == NULL) - return; - - nr_relocs = bo_ttm->reloc_buf_data[0] & 0xffff; - - for (i = 0; i < nr_relocs; i++) { - struct dri_ttm_reloc *r = &bo_ttm->relocs[i]; - - /* Continue walking the tree depth-first. */ - dri_ttm_bo_post_submit(r->target_buf); - - r->last_target_offset = r->target_buf->offset; - } -} - -static void -dri_ttm_post_submit(dri_bo *batch_buf, dri_fence **last_fence) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr; - int i; - - intel_update_buffer_offsets (bufmgr_ttm); - - dri_ttm_bo_post_submit(batch_buf); - - if (bufmgr_ttm->bufmgr.debug) - dri_ttm_dump_validation_list(bufmgr_ttm); - - for (i = 0; i < bufmgr_ttm->validate_count; i++) { - dri_bo *bo = bufmgr_ttm->validate_array[i].bo; - dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; - - /* Disconnect the buffer from the validate list */ - bo_ttm->validate_index = -1; - dri_bo_unreference(bo); - bufmgr_ttm->validate_array[i].bo = NULL; - } - bufmgr_ttm->validate_count = 0; -} - -/** - * Enables unlimited caching of buffer objects for reuse. - * - * This is potentially very memory expensive, as the cache at each bucket - * size is only bounded by how many buffers of that size we've managed to have - * in flight at once. - */ -void -intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; - int i; - - for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) { - bufmgr_ttm->cache_bucket[i].max_entries = -1; - } -} - -/* - * - */ -static int -dri_ttm_check_aperture_space(dri_bo *bo) -{ - return 0; -} - -/** - * Initializes the TTM buffer manager, which uses the kernel to allocate, map, - * and manage map buffer objections. - * - * \param fd File descriptor of the opened DRM device. - * \param fence_type Driver-specific fence type used for fences with no flush. - * \param fence_type_flush Driver-specific fence type used for fences with a - * flush. - */ -dri_bufmgr * -intel_bufmgr_ttm_init(int fd, unsigned int fence_type, - unsigned int fence_type_flush, int batch_size) -{ - dri_bufmgr_ttm *bufmgr_ttm; - int i; - - bufmgr_ttm = calloc(1, sizeof(*bufmgr_ttm)); - bufmgr_ttm->fd = fd; - bufmgr_ttm->fence_type = fence_type; - bufmgr_ttm->fence_type_flush = fence_type_flush; - - /* Let's go with one relocation per every 2 dwords (but round down a bit - * since a power of two will mean an extra page allocation for the reloc - * buffer). - * - * Every 4 was too few for the blender benchmark. - */ - bufmgr_ttm->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; - - bufmgr_ttm->bufmgr.bo_alloc = dri_ttm_alloc; - bufmgr_ttm->bufmgr.bo_alloc_static = dri_ttm_alloc_static; - bufmgr_ttm->bufmgr.bo_reference = dri_ttm_bo_reference; - bufmgr_ttm->bufmgr.bo_unreference = dri_ttm_bo_unreference; - bufmgr_ttm->bufmgr.bo_map = dri_ttm_bo_map; - bufmgr_ttm->bufmgr.bo_unmap = dri_ttm_bo_unmap; - bufmgr_ttm->bufmgr.fence_reference = dri_ttm_fence_reference; - bufmgr_ttm->bufmgr.fence_unreference = dri_ttm_fence_unreference; - bufmgr_ttm->bufmgr.fence_wait = dri_ttm_fence_wait; - bufmgr_ttm->bufmgr.destroy = dri_bufmgr_ttm_destroy; - bufmgr_ttm->bufmgr.emit_reloc = dri_ttm_emit_reloc; - bufmgr_ttm->bufmgr.process_relocs = dri_ttm_process_reloc; - bufmgr_ttm->bufmgr.post_submit = dri_ttm_post_submit; - bufmgr_ttm->bufmgr.debug = GL_FALSE; - bufmgr_ttm->bufmgr.check_aperture_space = dri_ttm_check_aperture_space; - /* Initialize the linked lists for BO reuse cache. */ - for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) - bufmgr_ttm->cache_bucket[i].tail = &bufmgr_ttm->cache_bucket[i].head; - - return &bufmgr_ttm->bufmgr; -} -#else -dri_bufmgr * -intel_bufmgr_ttm_init(int fd, unsigned int fence_type, - unsigned int fence_type_flush, int batch_size) -{ - return NULL; -} - -dri_bo * -intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, - unsigned int handle) -{ - return NULL; -} - -void -intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr) -{ -} -#endif diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h deleted file mode 100644 index f5bd64c90f..0000000000 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h +++ /dev/null @@ -1,28 +0,0 @@ - -#ifndef INTEL_BUFMGR_TTM_H -#define INTEL_BUFMGR_TTM_H - -#include "dri_bufmgr.h" - -extern dri_bo *intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, - unsigned int handle); - -#ifdef TTM_API -dri_fence *intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name, - drm_fence_arg_t *arg); -#endif - - -dri_bufmgr *intel_bufmgr_ttm_init(int fd, unsigned int fence_type, - unsigned int fence_type_flush, int batch_size); - -void -intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr); - -#ifndef TTM_API -#define DRM_I915_FENCE_CLASS_ACCEL 0 -#define DRM_I915_FENCE_TYPE_RW 2 -#define DRM_I915_FENCE_FLAG_FLUSHED 0x01000000 -#endif - -#endif diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 671b3f68a3..6d7d6811ac 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -59,7 +59,7 @@ #include "intel_buffer_objects.h" #include "intel_fbo.h" #include "intel_decode.h" -#include "intel_bufmgr_ttm.h" +#include "intel_bufmgr.h" #include "drirenderbuffer.h" #include "vblank.h" @@ -367,20 +367,26 @@ intelFlush(GLcontext * ctx) if (intel->batch->map != intel->batch->ptr) intel_batchbuffer_flush(intel->batch); - - /* XXX: Need to do an MI_FLUSH here. - */ } void intelFinish(GLcontext * ctx) { - struct intel_context *intel = intel_context(ctx); + struct gl_framebuffer *fb = ctx->DrawBuffer; + int i; + intelFlush(ctx); - if (intel->batch->last_fence) { - dri_fence_wait(intel->batch->last_fence); - dri_fence_unreference(intel->batch->last_fence); - intel->batch->last_fence = NULL; + + for (i = 0; i < fb->_NumColorDrawBuffers; i++) { + struct intel_renderbuffer *irb; + + irb = intel_renderbuffer(fb->_ColorDrawBuffers[i]); + + if (irb->region) + dri_bo_wait_rendering(irb->region->buffer); + } + if (fb->_DepthBuffer) { + /* XXX: Wait on buffer idle */ } } @@ -446,28 +452,25 @@ static GLboolean intel_init_bufmgr(struct intel_context *intel) { intelScreenPrivate *intelScreen = intel->intelScreen; - GLboolean ttm_disable = getenv("INTEL_NO_TTM") != NULL; - GLboolean ttm_supported; + GLboolean gem_disable = getenv("INTEL_NO_GEM") != NULL; + GLboolean gem_supported; - /* If we've got a new enough DDX that's initializing TTM and giving us + /* If we've got a new enough DDX that's initializing GEM and giving us * object handles for the shared buffers, use that. */ intel->ttm = GL_FALSE; if (intel->intelScreen->driScrnPriv->dri2.enabled) - ttm_supported = GL_TRUE; + gem_supported = GL_TRUE; else if (intel->intelScreen->driScrnPriv->ddx_version.minor >= 9 && intel->intelScreen->drmMinor >= 11 && intel->intelScreen->front.bo_handle != -1) - ttm_supported = GL_TRUE; + gem_supported = GL_TRUE; else - ttm_supported = GL_FALSE; + gem_supported = GL_FALSE; - if (!ttm_disable && ttm_supported) { + if (!gem_disable && gem_supported) { int bo_reuse_mode; - intel->bufmgr = intel_bufmgr_ttm_init(intel->driFd, - DRM_FENCE_TYPE_EXE, - DRM_FENCE_TYPE_EXE | - DRM_I915_FENCE_TYPE_RW, + intel->bufmgr = intel_bufmgr_gem_init(intel->driFd, BATCH_SZ); if (intel->bufmgr != NULL) intel->ttm = GL_TRUE; @@ -477,16 +480,16 @@ intel_init_bufmgr(struct intel_context *intel) case DRI_CONF_BO_REUSE_DISABLED: break; case DRI_CONF_BO_REUSE_ALL: - intel_ttm_enable_bo_reuse(intel->bufmgr); + intel_bufmgr_gem_enable_reuse(intel->bufmgr); break; } } /* Otherwise, use the classic buffer manager. */ if (intel->bufmgr == NULL) { - if (ttm_disable) { - fprintf(stderr, "TTM buffer manager disabled. Using classic.\n"); + if (gem_disable) { + fprintf(stderr, "GEM disabled. Using classic.\n"); } else { - fprintf(stderr, "Failed to initialize TTM buffer manager. " + fprintf(stderr, "Failed to initialize GEM. " "Falling back to classic.\n"); } @@ -496,14 +499,17 @@ intel_init_bufmgr(struct intel_context *intel) return GL_FALSE; } - intel->bufmgr = dri_bufmgr_fake_init(intelScreen->tex.offset, - intelScreen->tex.map, - intelScreen->tex.size, - intel_fence_emit, - intel_fence_wait, - intel); + intel->bufmgr = intel_bufmgr_fake_init(intelScreen->tex.offset, + intelScreen->tex.map, + intelScreen->tex.size, + intel_fence_emit, + intel_fence_wait, + intel); } + /* XXX bufmgr should be per-screen, not per-context */ + intelScreen->ttm = intel->ttm; + return GL_TRUE; } @@ -671,8 +677,6 @@ intelInitContext(struct intel_context *intel, intel_recreate_static_regions(intel); intel->batch = intel_batchbuffer_alloc(intel); - intel->last_swap_fence = NULL; - intel->first_swap_fence = NULL; intel_bufferobj_init(intel); intel_fbo_init(intel); @@ -690,7 +694,6 @@ intelInitContext(struct intel_context *intel, /* Force all software fallbacks */ if (driQueryOptionb(&intel->optionCache, "no_rast")) { fprintf(stderr, "disabling 3D rasterization\n"); - FALLBACK(intel, INTEL_FALLBACK_USER, 1); intel->no_rast = 1; } @@ -725,17 +728,7 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) intel->Fallback = 0; /* don't call _swrast_Flush later */ intel_batchbuffer_free(intel->batch); - - if (intel->last_swap_fence) { - dri_fence_wait(intel->last_swap_fence); - dri_fence_unreference(intel->last_swap_fence); - intel->last_swap_fence = NULL; - } - if (intel->first_swap_fence) { - dri_fence_wait(intel->first_swap_fence); - dri_fence_unreference(intel->first_swap_fence); - intel->first_swap_fence = NULL; - } + free(intel->prim.vb); if (release_texture_heaps) { /* This share group is about to go away, free our private @@ -887,7 +880,7 @@ intelContendedLock(struct intel_context *intel, GLuint flags) */ if (!intel->ttm && sarea->texAge != intel->hHWContext) { sarea->texAge = intel->hHWContext; - dri_bufmgr_fake_contended_lock_take(intel->bufmgr); + intel_bufmgr_fake_contended_lock_take(intel->bufmgr); if (INTEL_DEBUG & DEBUG_BATCH) intel_decode_context_reset(); if (INTEL_DEBUG & DEBUG_BUFMGR) diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index df79ab8897..f1116d2747 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -35,6 +35,7 @@ #include "mm.h" #include "texmem.h" #include "dri_bufmgr.h" +#include "intel_bufmgr.h" #include "intel_screen.h" #include "intel_tex_obj.h" @@ -85,6 +86,7 @@ struct intel_context { void (*destroy) (struct intel_context * intel); void (*emit_state) (struct intel_context * intel); + void (*finish_batch) (struct intel_context * intel); void (*new_batch) (struct intel_context * intel); void (*emit_invarient_state) (struct intel_context * intel); void (*note_fence) (struct intel_context *intel, GLuint fence); @@ -174,9 +176,6 @@ struct intel_context */ GLboolean ttm; - dri_fence *last_swap_fence; - dri_fence *first_swap_fence; - struct intel_batchbuffer *batch; GLboolean no_batch_wrap; unsigned batch_id; @@ -184,9 +183,13 @@ struct intel_context struct { GLuint id; - GLuint primitive; - GLubyte *start_ptr; + uint32_t primitive; /**< Current hardware primitive type */ void (*flush) (struct intel_context *); + dri_bo *vb_bo; + uint8_t *vb; + unsigned int start_offset; /**< Byte offset of primitive sequence */ + unsigned int current_offset; /**< Byte offset of next vertex */ + unsigned int count; /**< Number of vertices in current primitive */ } prim; GLuint stats_wm; diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index b3f6610546..bc0b579429 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -295,7 +295,8 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, rb->Height = height; /* This sets the Get/PutRow/Value functions */ - intel_set_span_functions(&irb->Base); + /* XXX can we choose a different tile here? */ + intel_set_span_functions(&irb->Base, INTEL_TILE_NONE); return GL_TRUE; } @@ -375,7 +376,7 @@ intel_renderbuffer_set_region(struct intel_renderbuffer *rb, * not a user-created renderbuffer. */ struct intel_renderbuffer * -intel_create_renderbuffer(GLenum intFormat) +intel_create_renderbuffer(GLenum intFormat, int tiling) { GET_CURRENT_CONTEXT(ctx); @@ -442,12 +443,14 @@ intel_create_renderbuffer(GLenum intFormat) irb->Base.InternalFormat = intFormat; + irb->tiling = tiling; + /* intel-specific methods */ irb->Base.Delete = intel_delete_renderbuffer; irb->Base.AllocStorage = intel_alloc_window_storage; irb->Base.GetPointer = intel_get_pointer; /* This sets the Get/PutRow/Value functions */ - intel_set_span_functions(&irb->Base); + intel_set_span_functions(&irb->Base, tiling); return irb; } @@ -519,7 +522,7 @@ intel_framebuffer_renderbuffer(GLcontext * ctx, static GLboolean intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, - struct gl_texture_image *texImage) + struct gl_texture_image *texImage) { if (texImage->TexFormat == &_mesa_texformat_argb8888) { irb->Base._ActualFormat = GL_RGBA8; @@ -558,7 +561,7 @@ intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, irb->Base.Delete = intel_delete_renderbuffer; irb->Base.AllocStorage = intel_nop_alloc_storage; - intel_set_span_functions(&irb->Base); + intel_set_span_functions(&irb->Base, irb->tiling); irb->RenderToTexture = GL_TRUE; @@ -586,6 +589,9 @@ intel_wrap_texture(GLcontext * ctx, struct gl_texture_image *texImage) _mesa_init_renderbuffer(&irb->Base, name); irb->Base.ClassID = INTEL_RB_CLASS; + /* XXX can we fix this? */ + irb->tiling = INTEL_TILE_NONE; + if (!intel_update_wrapper(ctx, irb, texImage)) { _mesa_free(irb); return NULL; diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h index c90c84b48c..9e085a1992 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.h +++ b/src/mesa/drivers/dri/intel/intel_fbo.h @@ -72,6 +72,7 @@ struct intel_renderbuffer struct intel_region *region; void *pfMap; /* possibly paged flipped map pointer */ GLuint pfPitch; /* possibly paged flipped pitch */ + int tiling; GLboolean RenderToTexture; /* RTT? */ GLuint PairedDepth; /**< only used if this is a depth renderbuffer */ @@ -90,7 +91,7 @@ intel_renderbuffer_set_region(struct intel_renderbuffer *irb, struct intel_region *region); extern struct intel_renderbuffer * -intel_create_renderbuffer(GLenum intFormat); +intel_create_renderbuffer(GLenum intFormat, int tiling); extern void intel_fbo_init(struct intel_context *intel); diff --git a/src/mesa/drivers/dri/intel/intel_ioctl.c b/src/mesa/drivers/dri/intel/intel_ioctl.c index f4566ba89c..c0a18fa225 100644 --- a/src/mesa/drivers/dri/intel/intel_ioctl.c +++ b/src/mesa/drivers/dri/intel/intel_ioctl.c @@ -30,6 +30,8 @@ #include <unistd.h> #include <errno.h> #include <sched.h> +#include <sys/types.h> +#include <sys/ioctl.h> #include "mtypes.h" #include "context.h" @@ -43,7 +45,7 @@ #include "drm.h" #include "i915_drm.h" -#include "intel_bufmgr_ttm.h" +#include "intel_bufmgr.h" #define FILE_DEBUG_FLAG DEBUG_IOCTL @@ -104,7 +106,7 @@ intelWaitIrq(struct intel_context *intel, int seq) } -void +int intel_batch_ioctl(struct intel_context *intel, GLuint start_offset, GLuint used, @@ -113,7 +115,7 @@ intel_batch_ioctl(struct intel_context *intel, struct drm_i915_batchbuffer batch; if (intel->no_hw) - return; + return 0; assert(intel->locked); assert(used); @@ -142,82 +144,53 @@ intel_batch_ioctl(struct intel_context *intel, if (drmCommandWrite(intel->driFd, DRM_I915_BATCHBUFFER, &batch, sizeof(batch))) { fprintf(stderr, "DRM_I915_BATCHBUFFER: %d\n", -errno); - UNLOCK_HARDWARE(intel); - exit(1); + return -errno; } + + return 0; } #ifdef TTM_API -void +int intel_exec_ioctl(struct intel_context *intel, GLuint used, GLboolean ignore_cliprects, GLboolean allow_unlock, - void *start, GLuint count, dri_fence **fence) + struct drm_i915_gem_execbuffer *execbuf) { - struct drm_i915_execbuffer execbuf; - dri_fence *fo; int ret; assert(intel->locked); assert(used); if (intel->no_hw) - return; - - if (*fence) { - dri_fence_unreference(*fence); - } + return 0; - memset(&execbuf, 0, sizeof(execbuf)); - - execbuf.num_buffers = count; - execbuf.batch.used = used; - execbuf.batch.cliprects = intel->pClipRects; - execbuf.batch.num_cliprects = ignore_cliprects ? 0 : intel->numClipRects; - execbuf.batch.DR1 = 0; - execbuf.batch.DR4 = ((((GLuint) intel->drawX) & 0xffff) | - (((GLuint) intel->drawY) << 16)); - - execbuf.ops_list = (unsigned long)start; // TODO - execbuf.fence_arg.flags = DRM_FENCE_FLAG_SHAREABLE | DRM_I915_FENCE_FLAG_FLUSHED; + execbuf->batch_start_offset = 0; + execbuf->batch_len = used; + execbuf->cliprects_ptr = (uintptr_t)intel->pClipRects; + execbuf->num_cliprects = ignore_cliprects ? 0 : intel->numClipRects; + execbuf->DR1 = 0; + execbuf->DR4 = ((((GLuint) intel->drawX) & 0xffff) | + (((GLuint) intel->drawY) << 16)); do { - ret = drmCommandWriteRead(intel->driFd, DRM_I915_EXECBUFFER, &execbuf, - sizeof(execbuf)); + ret = ioctl(intel->driFd, DRM_IOCTL_I915_GEM_EXECBUFFER, execbuf); } while (ret == -EAGAIN); if (ret != 0) { - fprintf(stderr, "DRM_I915_EXECBUFFER: %d\n", -errno); - UNLOCK_HARDWARE(intel); - exit(1); + fprintf(stderr, "DRM_I915_GEM_EXECBUFFER: %d\n", -errno); + return -errno; } - if (execbuf.fence_arg.error != 0) { - - /* - * Fence creation has failed, but the GPU has been - * idled by the kernel. Safe to continue. - */ - - *fence = NULL; - return; - } - - fo = intel_ttm_fence_create_from_arg(intel->bufmgr, "fence buffers", - &execbuf.fence_arg); - if (!fo) { - fprintf(stderr, "failed to fence handle: %08x\n", execbuf.fence_arg.handle); - UNLOCK_HARDWARE(intel); - exit(1); - } - *fence = fo; + return 0; } #else -void -intel_exec_ioctl(struct intel_context *intel, - GLuint used, - GLboolean ignore_cliprects, GLboolean allow_unlock, - void *start, GLuint count, dri_fence **fence) +int +int intel_exec_ioctl(struct intel_context *intel, + GLuint used, + GLboolean ignore_cliprects, GLboolean allow_unlock, + struct drm_i915_gem_execbuffer *execbuf); { + return -EINVAL; } #endif diff --git a/src/mesa/drivers/dri/intel/intel_ioctl.h b/src/mesa/drivers/dri/intel/intel_ioctl.h index 8674aef723..526e38358c 100644 --- a/src/mesa/drivers/dri/intel/intel_ioctl.h +++ b/src/mesa/drivers/dri/intel/intel_ioctl.h @@ -33,14 +33,14 @@ void intelWaitIrq( struct intel_context *intel, int seq ); int intelEmitIrqLocked( struct intel_context *intel ); -void intel_batch_ioctl( struct intel_context *intel, - GLuint start_offset, - GLuint used, - GLboolean ignore_cliprects, - GLboolean allow_unlock ); -void intel_exec_ioctl(struct intel_context *intel, +int intel_batch_ioctl(struct intel_context *intel, + GLuint start_offset, GLuint used, - GLboolean ignore_cliprects, GLboolean allow_unlock, - void *start, GLuint count, dri_fence **fence); + GLboolean ignore_cliprects, + GLboolean allow_unlock); +int intel_exec_ioctl(struct intel_context *intel, + GLuint used, + GLboolean ignore_cliprects, GLboolean allow_unlock, + struct drm_i915_gem_execbuffer *execbuf); #endif diff --git a/src/mesa/drivers/dri/intel/intel_pixel_draw.c b/src/mesa/drivers/dri/intel/intel_pixel_draw.c index 34813d2aa0..569e992b5e 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_draw.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_draw.c @@ -81,7 +81,8 @@ do_texture_drawpixels(GLcontext * ctx, else { /* PBO only for now: */ -/* _mesa_printf("%s - not PBO\n", __FUNCTION__); */ + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s - not PBO\n", __FUNCTION__); return GL_FALSE; } @@ -218,7 +219,6 @@ do_blit_drawpixels(GLcontext * ctx, struct intel_buffer_object *src = intel_buffer_object(unpack->BufferObj); GLuint src_offset; GLuint rowLength; - dri_fence *fence = NULL; if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s\n", __FUNCTION__); @@ -323,16 +323,9 @@ do_blit_drawpixels(GLcontext * ctx, ctx->Color.LogicOp : GL_COPY); } intel_batchbuffer_flush(intel->batch); - fence = intel->batch->last_fence; - dri_fence_reference(fence); } UNLOCK_HARDWARE(intel); - if (fence) { - dri_fence_wait(fence); - dri_fence_unreference(fence); - } - if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s - DONE\n", __FUNCTION__); diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h index 37629c07e2..96af7e1a03 100644 --- a/src/mesa/drivers/dri/intel/intel_reg.h +++ b/src/mesa/drivers/dri/intel/intel_reg.h @@ -31,11 +31,140 @@ #define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23) +#define MI_FLUSH (CMD_MI | (4 << 23)) +#define FLUSH_MAP_CACHE (1 << 0) +#define INHIBIT_FLUSH_RENDER_CACHE (1 << 2) + /* Stalls command execution waiting for the given events to have occurred. */ #define MI_WAIT_FOR_EVENT (CMD_MI | (0x3 << 23)) #define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) #define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) +/* p189 */ +#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 (CMD_3D | (0x1d<<24) | (0x04<<16)) +#define I1_LOAD_S(n) (1<<(4+n)) + +/** @{ + * 915 definitions + */ +#define S0_VB_OFFSET_MASK 0xffffffc +#define S0_AUTO_CACHE_INV_DISABLE (1<<0) +/** @} */ + +/** @{ + * 830 definitions + */ +#define S0_VB_OFFSET_MASK_830 0xffffff8 +#define S0_VB_PITCH_SHIFT_830 1 +#define S0_VB_ENABLE_830 0 +/** @} */ + +#define S1_VERTEX_WIDTH_SHIFT 24 +#define S1_VERTEX_WIDTH_MASK (0x3f<<24) +#define S1_VERTEX_PITCH_SHIFT 16 +#define S1_VERTEX_PITCH_MASK (0x3f<<16) + +#define TEXCOORDFMT_2D 0x0 +#define TEXCOORDFMT_3D 0x1 +#define TEXCOORDFMT_4D 0x2 +#define TEXCOORDFMT_1D 0x3 +#define TEXCOORDFMT_2D_16 0x4 +#define TEXCOORDFMT_4D_16 0x5 +#define TEXCOORDFMT_NOT_PRESENT 0xf +#define S2_TEXCOORD_FMT0_MASK 0xf +#define S2_TEXCOORD_FMT1_SHIFT 4 +#define S2_TEXCOORD_FMT(unit, type) ((type)<<(unit*4)) +#define S2_TEXCOORD_NONE (~0) +#define S2_TEX_COUNT_SHIFT_830 12 +#define S2_VERTEX_0_WIDTH_SHIFT_830 0 +#define S2_VERTEX_1_WIDTH_SHIFT_830 6 +/* S3 not interesting */ + +#define S4_POINT_WIDTH_SHIFT 23 +#define S4_POINT_WIDTH_MASK (0x1ff<<23) +#define S4_LINE_WIDTH_SHIFT 19 +#define S4_LINE_WIDTH_ONE (0x2<<19) +#define S4_LINE_WIDTH_MASK (0xf<<19) +#define S4_FLATSHADE_ALPHA (1<<18) +#define S4_FLATSHADE_FOG (1<<17) +#define S4_FLATSHADE_SPECULAR (1<<16) +#define S4_FLATSHADE_COLOR (1<<15) +#define S4_CULLMODE_BOTH (0<<13) +#define S4_CULLMODE_NONE (1<<13) +#define S4_CULLMODE_CW (2<<13) +#define S4_CULLMODE_CCW (3<<13) +#define S4_CULLMODE_MASK (3<<13) +#define S4_VFMT_POINT_WIDTH (1<<12) +#define S4_VFMT_SPEC_FOG (1<<11) +#define S4_VFMT_COLOR (1<<10) +#define S4_VFMT_DEPTH_OFFSET (1<<9) +#define S4_VFMT_XYZ (1<<6) +#define S4_VFMT_XYZW (2<<6) +#define S4_VFMT_XY (3<<6) +#define S4_VFMT_XYW (4<<6) +#define S4_VFMT_XYZW_MASK (7<<6) +#define S4_FORCE_DEFAULT_DIFFUSE (1<<5) +#define S4_FORCE_DEFAULT_SPECULAR (1<<4) +#define S4_LOCAL_DEPTH_OFFSET_ENABLE (1<<3) +#define S4_VFMT_FOG_PARAM (1<<2) +#define S4_SPRITE_POINT_ENABLE (1<<1) +#define S4_LINE_ANTIALIAS_ENABLE (1<<0) + +#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH | \ + S4_VFMT_SPEC_FOG | \ + S4_VFMT_COLOR | \ + S4_VFMT_DEPTH_OFFSET | \ + S4_VFMT_XYZW_MASK | \ + S4_VFMT_FOG_PARAM) + + +#define S5_WRITEDISABLE_ALPHA (1<<31) +#define S5_WRITEDISABLE_RED (1<<30) +#define S5_WRITEDISABLE_GREEN (1<<29) +#define S5_WRITEDISABLE_BLUE (1<<28) +#define S5_WRITEDISABLE_MASK (0xf<<28) +#define S5_FORCE_DEFAULT_POINT_SIZE (1<<27) +#define S5_LAST_PIXEL_ENABLE (1<<26) +#define S5_GLOBAL_DEPTH_OFFSET_ENABLE (1<<25) +#define S5_FOG_ENABLE (1<<24) +#define S5_STENCIL_REF_SHIFT 16 +#define S5_STENCIL_REF_MASK (0xff<<16) +#define S5_STENCIL_TEST_FUNC_SHIFT 13 +#define S5_STENCIL_TEST_FUNC_MASK (0x7<<13) +#define S5_STENCIL_FAIL_SHIFT 10 +#define S5_STENCIL_FAIL_MASK (0x7<<10) +#define S5_STENCIL_PASS_Z_FAIL_SHIFT 7 +#define S5_STENCIL_PASS_Z_FAIL_MASK (0x7<<7) +#define S5_STENCIL_PASS_Z_PASS_SHIFT 4 +#define S5_STENCIL_PASS_Z_PASS_MASK (0x7<<4) +#define S5_STENCIL_WRITE_ENABLE (1<<3) +#define S5_STENCIL_TEST_ENABLE (1<<2) +#define S5_COLOR_DITHER_ENABLE (1<<1) +#define S5_LOGICOP_ENABLE (1<<0) + + +#define S6_ALPHA_TEST_ENABLE (1<<31) +#define S6_ALPHA_TEST_FUNC_SHIFT 28 +#define S6_ALPHA_TEST_FUNC_MASK (0x7<<28) +#define S6_ALPHA_REF_SHIFT 20 +#define S6_ALPHA_REF_MASK (0xff<<20) +#define S6_DEPTH_TEST_ENABLE (1<<19) +#define S6_DEPTH_TEST_FUNC_SHIFT 16 +#define S6_DEPTH_TEST_FUNC_MASK (0x7<<16) +#define S6_CBUF_BLEND_ENABLE (1<<15) +#define S6_CBUF_BLEND_FUNC_SHIFT 12 +#define S6_CBUF_BLEND_FUNC_MASK (0x7<<12) +#define S6_CBUF_SRC_BLEND_FACT_SHIFT 8 +#define S6_CBUF_SRC_BLEND_FACT_MASK (0xf<<8) +#define S6_CBUF_DST_BLEND_FACT_SHIFT 4 +#define S6_CBUF_DST_BLEND_FACT_MASK (0xf<<4) +#define S6_DEPTH_WRITE_ENABLE (1<<3) +#define S6_COLOR_WRITE_ENABLE (1<<2) +#define S6_TRISTRIP_PV_SHIFT 0 +#define S6_TRISTRIP_PV_MASK (0x3<<0) + +#define S7_DEPTH_OFFSET_CONST_MASK ~0 + /* Primitive dispatch on 830-945 */ #define _3DPRIMITIVE (CMD_3D | (0x1f << 24)) #define PRIM_INDIRECT (1<<23) diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index 8bc548913f..c7e2c551dd 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -44,7 +44,7 @@ #include "intel_blit.h" #include "intel_buffer_objects.h" #include "dri_bufmgr.h" -#include "intel_bufmgr_ttm.h" +#include "intel_bufmgr.h" #include "intel_batchbuffer.h" #define FILE_DEBUG_FLAG DEBUG_REGION @@ -106,10 +106,7 @@ intel_region_alloc(struct intel_context *intel, dri_bo *buffer; buffer = dri_bo_alloc(intel->bufmgr, "region", - pitch * cpp * height, 64, - DRM_BO_FLAG_MEM_LOCAL | - DRM_BO_FLAG_CACHED | - DRM_BO_FLAG_CACHED_MAPPED); + pitch * cpp * height, 64); return intel_region_alloc_internal(intel, cpp, pitch, height, 0, buffer); } @@ -121,7 +118,7 @@ intel_region_alloc_for_handle(struct intel_context *intel, { dri_bo *buffer; - buffer = intel_ttm_bo_create_from_handle(intel->bufmgr, "region", handle); + buffer = intel_bo_gem_create_from_name(intel->bufmgr, "region", handle); return intel_region_alloc_internal(intel, cpp, pitch, height, tiled, buffer); @@ -355,10 +352,7 @@ intel_region_release_pbo(struct intel_context *intel, region->buffer = dri_bo_alloc(intel->bufmgr, "region", region->pitch * region->cpp * region->height, - 64, - DRM_BO_FLAG_MEM_LOCAL | - DRM_BO_FLAG_CACHED | - DRM_BO_FLAG_CACHED_MAPPED); + 64); } /* Break the COW tie to the pbo. Both the pbo and the region end up @@ -440,17 +434,16 @@ intel_recreate_static(struct intel_context *intel, if (intel->ttm) { assert(region_desc->bo_handle != -1); - region->buffer = intel_ttm_bo_create_from_handle(intel->bufmgr, - name, - region_desc->bo_handle); + region->buffer = intel_bo_gem_create_from_name(intel->bufmgr, + name, + region_desc->bo_handle); } else { - region->buffer = dri_bo_alloc_static(intel->bufmgr, - name, - region_desc->offset, - intelScreen->pitch * - intelScreen->height, - region_desc->map, - DRM_BO_FLAG_MEM_TT); + region->buffer = intel_bo_fake_alloc_static(intel->bufmgr, + name, + region_desc->offset, + intelScreen->pitch * + intelScreen->height, + region_desc->map); } assert(region->buffer != NULL); diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 5233e58fc9..8fd503ee8b 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -49,7 +49,7 @@ #include "i830_dri.h" #include "intel_regions.h" #include "intel_batchbuffer.h" -#include "intel_bufmgr_ttm.h" +#include "intel_bufmgr.h" PUBLIC const char __driConfigOptions[] = DRI_CONF_BEGIN @@ -59,7 +59,7 @@ PUBLIC const char __driConfigOptions[] = /* Options correspond to DRI_CONF_BO_REUSE_DISABLED, * DRI_CONF_BO_REUSE_ALL */ - DRI_CONF_OPT_BEGIN_V(bo_reuse, enum, 0, "0:1") + DRI_CONF_OPT_BEGIN_V(bo_reuse, enum, 1, "0:1") DRI_CONF_DESC_BEGIN(en, "Buffer object reuse") DRI_CONF_ENUM(0, "Disable buffer object reuse") DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects") @@ -221,16 +221,16 @@ intelPrintSAREA(const struct drm_i915_sarea * sarea) sarea->height); fprintf(stderr, "SAREA: pitch: %d\n", sarea->pitch); fprintf(stderr, - "SAREA: front offset: 0x%08x size: 0x%x handle: 0x%x\n", + "SAREA: front offset: 0x%08x size: 0x%x handle: 0x%x tiled: %d\n", sarea->front_offset, sarea->front_size, - (unsigned) sarea->front_handle); + (unsigned) sarea->front_handle, sarea->front_tiled); fprintf(stderr, - "SAREA: back offset: 0x%08x size: 0x%x handle: 0x%x\n", + "SAREA: back offset: 0x%08x size: 0x%x handle: 0x%x tiled: %d\n", sarea->back_offset, sarea->back_size, - (unsigned) sarea->back_handle); - fprintf(stderr, "SAREA: depth offset: 0x%08x size: 0x%x handle: 0x%x\n", + (unsigned) sarea->back_handle, sarea->back_tiled); + fprintf(stderr, "SAREA: depth offset: 0x%08x size: 0x%x handle: 0x%x tiled: %d\n", sarea->depth_offset, sarea->depth_size, - (unsigned) sarea->depth_handle); + (unsigned) sarea->depth_handle, sarea->depth_tiled); fprintf(stderr, "SAREA: tex offset: 0x%08x size: 0x%x handle: 0x%x\n", sarea->tex_offset, sarea->tex_size, (unsigned) sarea->tex_handle); } @@ -531,20 +531,23 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, /* setup the hardware-based renderbuffers */ { - intel_fb->color_rb[0] = intel_create_renderbuffer(rgbFormat); + intel_fb->color_rb[0] = intel_create_renderbuffer(rgbFormat, + screen->ttm ? screen->front.tiled : INTEL_TILE_NONE); _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_FRONT_LEFT, &intel_fb->color_rb[0]->Base); } if (mesaVis->doubleBufferMode) { - intel_fb->color_rb[1] = intel_create_renderbuffer(rgbFormat); + intel_fb->color_rb[1] = intel_create_renderbuffer(rgbFormat, + screen->ttm ? screen->back.tiled : INTEL_TILE_NONE); _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_BACK_LEFT, &intel_fb->color_rb[1]->Base); if (screen->third.handle) { struct gl_renderbuffer *tmp_rb = NULL; - intel_fb->color_rb[2] = intel_create_renderbuffer(rgbFormat); + intel_fb->color_rb[2] = intel_create_renderbuffer(rgbFormat, + screen->ttm ? screen->third.tiled : INTEL_TILE_NONE); _mesa_reference_renderbuffer(&tmp_rb, &intel_fb->color_rb[2]->Base); } } @@ -553,7 +556,8 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, if (mesaVis->stencilBits == 8) { /* combined depth/stencil buffer */ struct intel_renderbuffer *depthStencilRb - = intel_create_renderbuffer(GL_DEPTH24_STENCIL8_EXT); + = intel_create_renderbuffer(GL_DEPTH24_STENCIL8_EXT, + screen->ttm ? screen->depth.tiled : INTEL_TILE_NONE); /* note: bind RB to two attachment points */ _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, &depthStencilRb->Base); @@ -561,7 +565,8 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, &depthStencilRb->Base); } else { struct intel_renderbuffer *depthRb - = intel_create_renderbuffer(GL_DEPTH_COMPONENT24); + = intel_create_renderbuffer(GL_DEPTH_COMPONENT24, + screen->ttm ? screen->depth.tiled : INTEL_TILE_NONE); _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, &depthRb->Base); } @@ -569,7 +574,8 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, else if (mesaVis->depthBits == 16) { /* just 16-bit depth buffer, no hw stencil */ struct intel_renderbuffer *depthRb - = intel_create_renderbuffer(GL_DEPTH_COMPONENT16); + = intel_create_renderbuffer(GL_DEPTH_COMPONENT16, + screen->ttm ? screen->depth.tiled : INTEL_TILE_NONE); _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, &depthRb->Base); } diff --git a/src/mesa/drivers/dri/intel/intel_screen.h b/src/mesa/drivers/dri/intel/intel_screen.h index e62b2d7c89..9a73b13951 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.h +++ b/src/mesa/drivers/dri/intel/intel_screen.h @@ -74,6 +74,8 @@ typedef struct int irq_active; int allow_batchbuffer; + int ttm; + /** * Configuration cache with default values for all contexts */ diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index df4f5927a0..c6778b16ff 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -40,6 +40,137 @@ #include "swrast/swrast.h" /* + * Deal with tiled surfaces + */ + +#if 0 +/* These are pre-965 tile swizzling functions -- power of two widths */ +static uintptr_t x_tile_swizzle_pow2 (uintptr_t addr, int n) +{ + uintptr_t a = addr; + uintptr_t base_mask = (((~0) << (n + 4)) | 0xff); + uintptr_t x_mask = ((~0) << 12) & ~base_mask; + + a = ((a & base_mask) | + ((a >> (n-8)) & 0x7) | + ((a << 3) & x_mask)); + _mesa_printf ("x_swizzle %08x (base %x yrow %x tile#x %x xsword %x byte %x) %08x\n", + addr, + addr >> (n + 4), + (addr >> (n + 1)) & 0x7, + (addr >> 9) & ((1 << (n-8)) - 1), + (addr >> 5) & 0xf, + (addr & 0x1f), + a); + return a; +} + +static uintptr_t y_tile_swizzle_pow2 (uintptr_t addr, int n) +{ + uintptr_t a = (uintptr_t) addr; + uintptr_t base_mask = (((~0) << (n + 6)) | 0xf); + uintptr_t x_mask = ((~0) << 9) & ~base_mask; + + a = ((a & base_mask) | + ((a >> (n-3)) & 0x1f) | + ((a << 5) & x_mask)); + _mesa_printf ("y_swizzle %08x (base %x yrow %x tile#x %x xoword %x byte %x) %08x\n", + addr, + addr >> (n + 6), + (addr >> (n + 1)) & 0x01f, + (addr >> 7) & ((1 << (n-6)) - 1), + (addr >> 4) & 0x7, + (addr & 0xf), + a); + return a; +} +#endif + +static GLubyte *x_tile_swizzle(struct intel_renderbuffer *irb, struct intel_context *intel, + int x, int y) +{ + GLubyte *buf = (GLubyte *) irb->pfMap; + int tile_stride; + int xbyte; + int x_tile_off, y_tile_off; + int x_tile_number, y_tile_number; + int tile_off, tile_base; + + tile_stride = (irb->pfPitch * irb->region->cpp) << 3; + + x += intel->drawX; + y += intel->drawY; + + xbyte = x * irb->region->cpp; + + x_tile_off = xbyte & 0x1ff; + y_tile_off = y & 7; + +#ifndef I915 + /* The documentation says that X tile layout is arranged in 8 512-byte + * lines of pixel data. However, that doesn't appear to be the case + * on GM965, tested by drawing a 128x8 quad in no_rast mode. For lines + * 1,2,4, and 7 of each tile, each consecutive pair of 64-byte spans + * has the locations of those spans swapped. + */ + switch (y_tile_off) { + case 1: + case 2: + case 4: + case 7: + x_tile_off ^= 64; + break; + default: + break; + } +#endif + + x_tile_number = xbyte >> 9; + y_tile_number = y >> 3; + + tile_off = (y_tile_off << 9) + x_tile_off; + tile_base = (x_tile_number << 12) + y_tile_number * tile_stride; + +#if 0 + printf("(%d,%d) -> %d + %d = %d (pitch = %d, tstride = %d)\n", + x, y, tile_off, tile_base, + tile_off + tile_base, + irb->pfPitch, tile_stride); +#endif + + return buf + tile_base + tile_off; +} + +static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_context *intel, + int x, int y) +{ + GLubyte *buf = (GLubyte *) irb->pfMap; + int tile_stride; + int xbyte; + int x_tile_off, y_tile_off; + int x_tile_number, y_tile_number; + int tile_off, tile_base; + + tile_stride = (irb->pfPitch * irb->region->cpp) << 3; + + x += intel->drawX; + y += intel->drawY; + + xbyte = x * irb->region->cpp; + + x_tile_off = xbyte & 0x7f; + y_tile_off = y & 0x1f; + + x_tile_number = xbyte >> 7; + y_tile_number = y >> 5; + + tile_off = ((x_tile_off & ~0xf) << 5) + (y_tile_off << 4) + (x_tile_off & 0xf); + tile_base = (x_tile_number << 12) + y_tile_number * tile_stride; + + return buf + tile_base + tile_off; +} + +/* break intelWriteRGBASpan_ARGB8888 */ @@ -55,7 +186,7 @@ + (intel->drawY * irb->pfPitch + intel->drawX) * irb->region->cpp;\ GLuint p; \ assert(irb->pfMap);\ - (void) p; + (void) p; (void) buf; /* XXX FBO: this is identical to the macro in spantmp2.h except we get * the cliprect info from the context, not the driDrawable. @@ -69,12 +200,14 @@ int miny = intel->pClipRects[_nc].y1 - intel->drawY; \ int maxx = intel->pClipRects[_nc].x2 - intel->drawX; \ int maxy = intel->pClipRects[_nc].y2 - intel->drawY; - - - + +#if 0 + }} +#endif #define Y_FLIP(_y) ((_y) * yScale + yBias) +/* XXX with GEM, these need to tell the kernel */ #define HW_LOCK() #define HW_UNLOCK() @@ -99,6 +232,43 @@ #define GET_PTR(X,Y) (buf + ((Y) * irb->pfPitch + (X)) * 4) #include "spantmp2.h" +/* 16 bit RGB565 color tile spanline and pixel functions + */ + +#define SPANTMP_PIXEL_FMT GL_RGB +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 + +#define TAG(x) intel_XTile_##x##_RGB565 +#define TAG2(x,y) intel_XTile_##x##_RGB565##y +#define GET_PTR(X,Y) x_tile_swizzle(irb, intel, X, Y) +#include "spantmp2.h" + +#define SPANTMP_PIXEL_FMT GL_RGB +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 + +#define TAG(x) intel_YTile_##x##_RGB565 +#define TAG2(x,y) intel_YTile_##x##_RGB565##y +#define GET_PTR(X,Y) y_tile_swizzle(irb, intel, X, Y) +#include "spantmp2.h" + +/* 32 bit ARGB888 color tile spanline and pixel functions + */ + +#define SPANTMP_PIXEL_FMT GL_BGRA +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV + +#define TAG(x) intel_XTile_##x##_ARGB8888 +#define TAG2(x,y) intel_XTile_##x##_ARGB8888##y +#define GET_PTR(X,Y) x_tile_swizzle(irb, intel, X, Y) +#include "spantmp2.h" + +#define SPANTMP_PIXEL_FMT GL_BGRA +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV + +#define TAG(x) intel_YTile_##x##_ARGB8888 +#define TAG2(x,y) intel_YTile_##x##_ARGB8888##y +#define GET_PTR(X,Y) y_tile_swizzle(irb, intel, X, Y) +#include "spantmp2.h" #define LOCAL_DEPTH_VARS \ struct intel_context *intel = intel_context(ctx); \ @@ -107,7 +277,7 @@ const GLint yScale = irb->RenderToTexture ? 1 : -1; \ const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1; \ char *buf = (char *) irb->pfMap/*XXX use region->map*/ + \ - (intel->drawY * pitch + intel->drawX) * irb->region->cpp; + (intel->drawY * pitch + intel->drawX) * irb->region->cpp; (void) buf; #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS @@ -127,6 +297,33 @@ /** + ** 16-bit x tile depthbuffer functions. + **/ +#define WRITE_DEPTH( _x, _y, d ) \ + (*((GLushort *)x_tile_swizzle (irb, intel, _x, _y)) = d) + +#define READ_DEPTH( d, _x, _y ) \ + d = *((GLushort *)x_tile_swizzle (irb, intel, _x, _y)) + + +#define TAG(x) intel_XTile_##x##_z16 +#include "depthtmp.h" + +/** + ** 16-bit y tile depthbuffer functions. + **/ +#define WRITE_DEPTH( _x, _y, d ) \ + (*((GLushort *)y_tile_swizzle (irb, intel, _x, _y)) = d) + +#define READ_DEPTH( d, _x, _y ) \ + (d = *((GLushort *)y_tile_swizzle (irb, intel, _x, _y))) + + +#define TAG(x) intel_YTile_##x##_z16 +#include "depthtmp.h" + + +/** ** 24/8-bit interleaved depth/stencil functions ** Note: we're actually reading back combined depth+stencil values. ** The wrappers in main/depthstencil.c are used to extract the depth @@ -149,6 +346,49 @@ /** + ** 24/8-bit x-tile interleaved depth/stencil functions + ** Note: we're actually reading back combined depth+stencil values. + ** The wrappers in main/depthstencil.c are used to extract the depth + ** and stencil values. + **/ +/* Change ZZZS -> SZZZ */ +#define WRITE_DEPTH( _x, _y, d ) { \ + GLuint tmp = ((d) >> 8) | ((d) << 24); \ + *((GLuint *)x_tile_swizzle (irb, intel, _x, _y)) = tmp; \ +} + +/* Change SZZZ -> ZZZS */ +#define READ_DEPTH( d, _x, _y ) { \ + GLuint tmp = *((GLuint *)x_tile_swizzle (irb, intel, _x, _y)); \ + d = (tmp << 8) | (tmp >> 24); \ +} + +#define TAG(x) intel_XTile_##x##_z24_s8 +#include "depthtmp.h" + +/** + ** 24/8-bit y-tile interleaved depth/stencil functions + ** Note: we're actually reading back combined depth+stencil values. + ** The wrappers in main/depthstencil.c are used to extract the depth + ** and stencil values. + **/ +/* Change ZZZS -> SZZZ */ +#define WRITE_DEPTH( _x, _y, d ) { \ + GLuint tmp = ((d) >> 8) | ((d) << 24); \ + *((GLuint *)y_tile_swizzle (irb, intel, _x, _y)) = tmp; \ +} + +/* Change SZZZ -> ZZZS */ +#define READ_DEPTH( d, _x, _y ) { \ + GLuint tmp = *((GLuint *)y_tile_swizzle (irb, intel, _x, _y)); \ + d = (tmp << 8) | (tmp >> 24); \ +} + +#define TAG(x) intel_YTile_##x##_z24_s8 +#include "depthtmp.h" + + +/** ** 8-bit stencil function (XXX FBO: This is obsolete) **/ #define WRITE_STENCIL( _x, _y, d ) { \ @@ -164,6 +404,40 @@ #define TAG(x) intel##x##_z24_s8 #include "stenciltmp.h" +/** + ** 8-bit x-tile stencil function (XXX FBO: This is obsolete) + **/ +#define WRITE_STENCIL( _x, _y, d ) { \ + GLuint *a = (GLuint *) x_tile_swizzle (irb, intel, _x, _y); \ + GLuint tmp = *a; \ + tmp &= 0xffffff; \ + tmp |= ((d) << 24); \ + *a = tmp; \ +} + +#define READ_STENCIL( d, _x, _y ) \ + (d = *((GLuint*) x_tile_swizzle (irb, intel, _x, _y)) >> 24) + +#define TAG(x) intel_XTile_##x##_z24_s8 +#include "stenciltmp.h" + +/** + ** 8-bit y-tile stencil function (XXX FBO: This is obsolete) + **/ +#define WRITE_STENCIL( _x, _y, d ) { \ + GLuint *a = (GLuint *) y_tile_swizzle (irb, intel, _x, _y); \ + GLuint tmp = *a; \ + tmp &= 0xffffff; \ + tmp |= ((d) << 24); \ + *a = tmp; \ +} + +#define READ_STENCIL( d, _x, _y ) \ + (d = *((GLuint*) y_tile_swizzle (irb, intel, _x, _y)) >> 24) + +#define TAG(x) intel_YTile_##x##_z24_s8 +#include "stenciltmp.h" + /** @@ -379,25 +653,80 @@ intelInitSpanFuncs(GLcontext * ctx) * These are used for the software fallbacks. */ void -intel_set_span_functions(struct gl_renderbuffer *rb) +intel_set_span_functions(struct gl_renderbuffer *rb, int tiling) { if (rb->_ActualFormat == GL_RGB5) { /* 565 RGB */ - intelInitPointers_RGB565(rb); + switch (tiling) { + case INTEL_TILE_NONE: + default: + intelInitPointers_RGB565(rb); + break; + case INTEL_TILE_X: + intel_XTile_InitPointers_RGB565(rb); + break; + case INTEL_TILE_Y: + intel_YTile_InitPointers_RGB565(rb); + break; + } } else if (rb->_ActualFormat == GL_RGBA8) { /* 8888 RGBA */ - intelInitPointers_ARGB8888(rb); + switch (tiling) { + case INTEL_TILE_NONE: + default: + intelInitPointers_ARGB8888(rb); + break; + case INTEL_TILE_X: + intel_XTile_InitPointers_ARGB8888(rb); + break; + case INTEL_TILE_Y: + intel_YTile_InitPointers_ARGB8888(rb); + break; + } } else if (rb->_ActualFormat == GL_DEPTH_COMPONENT16) { - intelInitDepthPointers_z16(rb); + switch (tiling) { + case INTEL_TILE_NONE: + default: + intelInitDepthPointers_z16(rb); + break; + case INTEL_TILE_X: + intel_XTile_InitDepthPointers_z16(rb); + break; + case INTEL_TILE_Y: + intel_YTile_InitDepthPointers_z16(rb); + break; + } } else if (rb->_ActualFormat == GL_DEPTH_COMPONENT24 || /* XXX FBO remove */ rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT) { - intelInitDepthPointers_z24_s8(rb); + switch (tiling) { + case INTEL_TILE_NONE: + default: + intelInitDepthPointers_z24_s8(rb); + break; + case INTEL_TILE_X: + intel_XTile_InitDepthPointers_z24_s8(rb); + break; + case INTEL_TILE_Y: + intel_YTile_InitDepthPointers_z24_s8(rb); + break; + } } - else if (rb->_ActualFormat == GL_STENCIL_INDEX8_EXT) { /* XXX FBO remove */ - intelInitStencilPointers_z24_s8(rb); + else if (rb->_ActualFormat == GL_STENCIL_INDEX8_EXT) { + switch (tiling) { + case INTEL_TILE_NONE: + default: + intelInitStencilPointers_z24_s8(rb); + break; + case INTEL_TILE_X: + intel_XTile_InitStencilPointers_z24_s8(rb); + break; + case INTEL_TILE_Y: + intel_YTile_InitStencilPointers_z24_s8(rb); + break; + } } else { _mesa_problem(NULL, diff --git a/src/mesa/drivers/dri/intel/intel_span.h b/src/mesa/drivers/dri/intel/intel_span.h index 5201f6d6c6..c56e5e1611 100644 --- a/src/mesa/drivers/dri/intel/intel_span.h +++ b/src/mesa/drivers/dri/intel/intel_span.h @@ -33,6 +33,10 @@ extern void intelInitSpanFuncs(GLcontext * ctx); extern void intelSpanRenderFinish(GLcontext * ctx); extern void intelSpanRenderStart(GLcontext * ctx); -extern void intel_set_span_functions(struct gl_renderbuffer *rb); +extern void intel_set_span_functions(struct gl_renderbuffer *rb, int tiling); + +#define INTEL_TILE_NONE 0 +#define INTEL_TILE_X 1 +#define INTEL_TILE_Y 2 #endif |