diff options
Diffstat (limited to 'src')
76 files changed, 4275 insertions, 1736 deletions
diff --git a/src/mesa/drivers/dri/Makefile.template b/src/mesa/drivers/dri/Makefile.template index 072d16ce8b..95eeecc7c4 100644 --- a/src/mesa/drivers/dri/Makefile.template +++ b/src/mesa/drivers/dri/Makefile.template @@ -10,6 +10,11 @@ COMMON_SOURCES = \ ../common/xmlconfig.c \ ../common/drirenderbuffer.c +COMMON_BM_SOURCES = \ + ../common/dri_bufmgr.c \ + ../common/dri_bufmgr_fake.c + + ifeq ($(WINDOW_SYSTEM),dri) WINOBJ= WINLIB= diff --git a/src/mesa/drivers/dri/common/dri_bufmgr.c b/src/mesa/drivers/dri/common/dri_bufmgr.c new file mode 100644 index 0000000000..8b5cb7628e --- /dev/null +++ b/src/mesa/drivers/dri/common/dri_bufmgr.c @@ -0,0 +1,160 @@ +/* + * Copyright © 2007 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include <string.h> +#include <stdlib.h> +#include <assert.h> +#include "mtypes.h" +#include "dri_bufmgr.h" + +/** @file dri_bufmgr.c + * + * Convenience functions for buffer management methods. + */ + +dri_bo * +dri_bo_alloc(dri_bufmgr *bufmgr, const char *name, unsigned long size, + unsigned int alignment, uint64_t location_mask) +{ + return bufmgr->bo_alloc(bufmgr, name, size, alignment, location_mask); +} + +dri_bo * +dri_bo_alloc_static(dri_bufmgr *bufmgr, const char *name, unsigned long offset, + unsigned long size, void *virtual, + uint64_t location_mask) +{ + return bufmgr->bo_alloc_static(bufmgr, name, offset, size, virtual, + location_mask); +} + +void +dri_bo_reference(dri_bo *bo) +{ + bo->bufmgr->bo_reference(bo); +} + +void +dri_bo_unreference(dri_bo *bo) +{ + if (bo == NULL) + return; + + bo->bufmgr->bo_unreference(bo); +} + +int +dri_bo_map(dri_bo *buf, GLboolean write_enable) +{ + return buf->bufmgr->bo_map(buf, write_enable); +} + +int +dri_bo_unmap(dri_bo *buf) +{ + return buf->bufmgr->bo_unmap(buf); +} + +void +dri_fence_wait(dri_fence *fence) +{ + fence->bufmgr->fence_wait(fence); +} + +void +dri_fence_reference(dri_fence *fence) +{ + fence->bufmgr->fence_reference(fence); +} + +void +dri_fence_unreference(dri_fence *fence) +{ + if (fence == NULL) + return; + + fence->bufmgr->fence_unreference(fence); +} + +void +dri_bo_subdata(dri_bo *bo, unsigned long offset, + unsigned long size, const void *data) +{ + if (size == 0 || data == NULL) + return; + + dri_bo_map(bo, GL_TRUE); + memcpy((unsigned char *)bo->virtual + offset, data, size); + dri_bo_unmap(bo); +} + +void +dri_bo_get_subdata(dri_bo *bo, unsigned long offset, + unsigned long size, void *data) +{ + if (size == 0 || data == NULL) + return; + + dri_bo_map(bo, GL_FALSE); + memcpy(data, (unsigned char *)bo->virtual + offset, size); + dri_bo_unmap(bo); +} + +void +dri_bufmgr_destroy(dri_bufmgr *bufmgr) +{ + bufmgr->destroy(bufmgr); +} + + +int dri_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta, + GLuint offset, dri_bo *target_buf) +{ + return reloc_buf->bufmgr->emit_reloc(reloc_buf, flags, delta, offset, target_buf); +} + +void *dri_process_relocs(dri_bo *batch_buf, GLuint *count) +{ + return batch_buf->bufmgr->process_relocs(batch_buf, count); +} + +void dri_post_submit(dri_bo *batch_buf, dri_fence **last_fence) +{ + batch_buf->bufmgr->post_submit(batch_buf, last_fence); +} + +void +dri_bufmgr_set_debug(dri_bufmgr *bufmgr, GLboolean enable_debug) +{ + bufmgr->debug = enable_debug; +} + +int +dri_bufmgr_check_aperture_space(dri_bo *bo) +{ + return bo->bufmgr->check_aperture_space(bo); +} diff --git a/src/mesa/drivers/dri/common/dri_bufmgr.h b/src/mesa/drivers/dri/common/dri_bufmgr.h new file mode 100644 index 0000000000..0a726dc108 --- /dev/null +++ b/src/mesa/drivers/dri/common/dri_bufmgr.h @@ -0,0 +1,260 @@ +/************************************************************************** + * + * Copyright © 2007 Intel Corporation + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ +/* + * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com> + * Keith Whitwell <keithw-at-tungstengraphics-dot-com> + * Eric Anholt <eric@anholt.net> + */ + +#ifndef _DRI_BUFMGR_H_ +#define _DRI_BUFMGR_H_ +#include <xf86drm.h> + +typedef struct _dri_bufmgr dri_bufmgr; +typedef struct _dri_bo dri_bo; +typedef struct _dri_fence dri_fence; + +struct _dri_bo { + /** Size in bytes of the buffer object. */ + unsigned long size; + /** + * Card virtual address (offset from the beginning of the aperture) for the + * object. Only valid while validated. + */ + unsigned long offset; + /** + * Virtual address for accessing the buffer data. Only valid while mapped. + */ + void *virtual; + /** Buffer manager context associated with this buffer object */ + dri_bufmgr *bufmgr; +}; + +struct _dri_fence { + /** + * This is an ORed mask of DRM_BO_FLAG_READ, DRM_BO_FLAG_WRITE, and + * DRM_FLAG_EXE indicating the operations associated with this fence. + * + * It is constant for the life of the fence object. + */ + unsigned int type; + /** Buffer manager context associated with this fence */ + dri_bufmgr *bufmgr; +}; + +/** + * Context for a buffer manager instance. + * + * Contains public methods followed by private storage for the buffer manager. + */ +struct _dri_bufmgr { + /** + * Allocate a buffer object. + * + * Buffer objects are not necessarily initially mapped into CPU virtual + * address space or graphics device aperture. They must be mapped using + * bo_map() to be used by the CPU, and validated for use using bo_validate() + * to be used from the graphics device. + */ + dri_bo *(*bo_alloc)(dri_bufmgr *bufmgr_ctx, const char *name, + unsigned long size, unsigned int alignment, + uint64_t location_mask); + + /** + * Allocates a buffer object for a static allocation. + * + * Static allocations are ones such as the front buffer that are offered by + * the X Server, which are never evicted and never moved. + */ + dri_bo *(*bo_alloc_static)(dri_bufmgr *bufmgr_ctx, const char *name, + unsigned long offset, unsigned long size, + void *virtual, uint64_t location_mask); + + /** Takes a reference on a buffer object */ + void (*bo_reference)(dri_bo *bo); + + /** + * Releases a reference on a buffer object, freeing the data if + * rerefences remain. + */ + void (*bo_unreference)(dri_bo *bo); + + /** + * Maps the buffer into userspace. + * + * This function will block waiting for any existing fence on the buffer to + * clear, first. The resulting mapping is available at buf->virtual. +\ */ + int (*bo_map)(dri_bo *buf, GLboolean write_enable); + + /** Reduces the refcount on the userspace mapping of the buffer object. */ + int (*bo_unmap)(dri_bo *buf); + + /** Takes a reference on a fence object */ + void (*fence_reference)(dri_fence *fence); + + /** + * Releases a reference on a fence object, freeing the data if + * rerefences remain. + */ + void (*fence_unreference)(dri_fence *fence); + + /** + * Blocks until the given fence is signaled. + */ + void (*fence_wait)(dri_fence *fence); + + /** + * Tears down the buffer manager instance. + */ + void (*destroy)(dri_bufmgr *bufmgr); + + /** + * Add relocation entry in reloc_buf, which will be updated with the + * target buffer's real offset on on command submission. + * + * Relocations remain in place for the lifetime of the buffer object. + * + * \param reloc_buf Buffer to write the relocation into. + * \param flags BO flags to be used in validating the target buffer. + * Applicable flags include: + * - DRM_BO_FLAG_READ: The buffer will be read in the process of + * command execution. + * - DRM_BO_FLAG_WRITE: The buffer will be written in the process of + * command execution. + * - DRM_BO_FLAG_MEM_TT: The buffer should be validated in TT memory. + * - DRM_BO_FLAG_MEM_VRAM: The buffer should be validated in video + * memory. + * \param delta Constant value to be added to the relocation target's offset. + * \param offset Byte offset within batch_buf of the relocated pointer. + * \param target Buffer whose offset should be written into the relocation + * entry. + */ + int (*emit_reloc)(dri_bo *reloc_buf, uint64_t flags, GLuint delta, + GLuint offset, dri_bo *target); + + /** + * Processes the relocations, either in userland or by converting the list + * for use in batchbuffer submission. + * + * Kernel-based implementations will return a pointer to the arguments + * to be handed with batchbuffer submission to the kernel. The userland + * implementation performs the buffer validation and emits relocations + * into them the appopriate order. + * + * \param batch_buf buffer at the root of the tree of relocations + * \param count returns the number of buffers validated. + * \return relocation record for use in command submission. + * */ + void *(*process_relocs)(dri_bo *batch_buf, GLuint *count); + + void (*post_submit)(dri_bo *batch_buf, dri_fence **fence); + + int (*check_aperture_space)(dri_bo *bo); + GLboolean debug; /**< Enables verbose debugging printouts */ +}; + +dri_bo *dri_bo_alloc(dri_bufmgr *bufmgr, const char *name, unsigned long size, + unsigned int alignment, uint64_t location_mask); +dri_bo *dri_bo_alloc_static(dri_bufmgr *bufmgr, const char *name, + unsigned long offset, unsigned long size, + void *virtual, uint64_t location_mask); +void dri_bo_reference(dri_bo *bo); +void dri_bo_unreference(dri_bo *bo); +int dri_bo_map(dri_bo *buf, GLboolean write_enable); +int dri_bo_unmap(dri_bo *buf); +void dri_fence_wait(dri_fence *fence); +void dri_fence_reference(dri_fence *fence); +void dri_fence_unreference(dri_fence *fence); + +void dri_bo_subdata(dri_bo *bo, unsigned long offset, + unsigned long size, const void *data); +void dri_bo_get_subdata(dri_bo *bo, unsigned long offset, + unsigned long size, void *data); + +void dri_bufmgr_fake_contended_lock_take(dri_bufmgr *bufmgr); +dri_bufmgr *dri_bufmgr_fake_init(unsigned long low_offset, void *low_virtual, + unsigned long size, + unsigned int (*fence_emit)(void *private), + int (*fence_wait)(void *private, + unsigned int cookie), + void *driver_priv); +void dri_bufmgr_set_debug(dri_bufmgr *bufmgr, GLboolean enable_debug); +void dri_bo_fake_disable_backing_store(dri_bo *bo, + void (*invalidate_cb)(dri_bo *bo, + void *ptr), + void *ptr); +void dri_bufmgr_destroy(dri_bufmgr *bufmgr); + +int dri_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta, + GLuint offset, dri_bo *target_buf); +void *dri_process_relocs(dri_bo *batch_buf, uint32_t *count); +void dri_post_process_relocs(dri_bo *batch_buf); +void dri_post_submit(dri_bo *batch_buf, dri_fence **last_fence); +int dri_bufmgr_check_aperture_space(dri_bo *bo); + +#ifndef TTM_API +/* reuse some TTM API */ + +#define DRM_BO_MEM_LOCAL 0 +#define DRM_BO_MEM_TT 1 +#define DRM_BO_MEM_VRAM 2 +#define DRM_BO_MEM_PRIV0 3 +#define DRM_BO_MEM_PRIV1 4 +#define DRM_BO_MEM_PRIV2 5 +#define DRM_BO_MEM_PRIV3 6 +#define DRM_BO_MEM_PRIV4 7 + +#define DRM_BO_FLAG_READ (1ULL << 0) +#define DRM_BO_FLAG_WRITE (1ULL << 1) +#define DRM_BO_FLAG_EXE (1ULL << 2) +#define DRM_BO_MASK_ACCESS (DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE | DRM_BO_FLAG_EXE) +#define DRM_BO_FLAG_NO_EVICT (1ULL << 4) + +#define DRM_BO_FLAG_MAPPABLE (1ULL << 5) +#define DRM_BO_FLAG_SHAREABLE (1ULL << 6) + +#define DRM_BO_FLAG_CACHED (1ULL << 7) + +#define DRM_BO_FLAG_NO_MOVE (1ULL << 8) +#define DRM_BO_FLAG_CACHED_MAPPED (1ULL << 19) +#define DRM_BO_FLAG_FORCE_CACHING (1ULL << 13) +#define DRM_BO_FLAG_FORCE_MAPPABLE (1ULL << 14) +#define DRM_BO_FLAG_TILE (1ULL << 15) + +#define DRM_BO_FLAG_MEM_LOCAL (1ULL << 24) +#define DRM_BO_FLAG_MEM_TT (1ULL << 25) +#define DRM_BO_FLAG_MEM_VRAM (1ULL << 26) + +#define DRM_BO_MASK_MEM 0x00000000FF000000ULL + +#define DRM_FENCE_TYPE_EXE 0x00000001 +#endif + +#endif diff --git a/src/mesa/drivers/dri/common/dri_bufmgr_fake.c b/src/mesa/drivers/dri/common/dri_bufmgr_fake.c new file mode 100644 index 0000000000..9bf3f3437c --- /dev/null +++ b/src/mesa/drivers/dri/common/dri_bufmgr_fake.c @@ -0,0 +1,1245 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Originally a fake version of the buffer manager so that we can + * prototype the changes in a driver fairly quickly, has been fleshed + * out to a fully functional interim solution. + * + * Basically wraps the old style memory management in the new + * programming interface, but is more expressive and avoids many of + * the bugs in the old texture manager. + */ +#include "mtypes.h" +#include "dri_bufmgr.h" +#include "drm.h" + +#include "simple_list.h" +#include "mm.h" +#include "imports.h" + +#define DBG(...) do { \ + if (bufmgr_fake->bufmgr.debug) \ + _mesa_printf(__VA_ARGS__); \ +} while (0) + +/* Internal flags: + */ +#define BM_NO_BACKING_STORE 0x00000001 +#define BM_NO_FENCE_SUBDATA 0x00000002 +#define BM_PINNED 0x00000004 + +/* Wrapper around mm.c's mem_block, which understands that you must + * wait for fences to expire before memory can be freed. This is + * specific to our use of memcpy for uploads - an upload that was + * processed through the command queue wouldn't need to care about + * fences. + */ +#define MAX_RELOCS 4096 + +struct fake_buffer_reloc +{ + /** Buffer object that the relocation points at. */ + dri_bo *target_buf; + /** Offset of the relocation entry within reloc_buf. */ + GLuint offset; + /** Cached value of the offset when we last performed this relocation. */ + GLuint last_target_offset; + /** Value added to target_buf's offset to get the relocation entry. */ + GLuint delta; + /** Flags to validate the target buffer under. */ + uint64_t validate_flags; +}; + +struct block { + struct block *next, *prev; + struct mem_block *mem; /* BM_MEM_AGP */ + + /** + * Marks that the block is currently in the aperture and has yet to be + * fenced. + */ + unsigned on_hardware:1; + /** + * Marks that the block is currently fenced (being used by rendering) and + * can't be freed until @fence is passed. + */ + unsigned fenced:1; + + /** Fence cookie for the block. */ + unsigned fence; /* Split to read_fence, write_fence */ + + dri_bo *bo; + void *virtual; +}; + +typedef struct _bufmgr_fake { + dri_bufmgr bufmgr; + + unsigned long low_offset; + unsigned long size; + void *virtual; + + struct mem_block *heap; + struct block lru; /* only allocated, non-fence-pending blocks here */ + + unsigned buf_nr; /* for generating ids */ + + struct block on_hardware; /* after bmValidateBuffers */ + struct block fenced; /* after bmFenceBuffers (mi_flush, emit irq, write dword) */ + /* then to bufmgr->lru or free() */ + + unsigned int last_fence; + + unsigned fail:1; + unsigned need_fence:1; + GLboolean thrashing; + + /** + * Driver callback to emit a fence, returning the cookie. + * + * Currently, this also requires that a write flush be emitted before + * emitting the fence, but this should change. + */ + unsigned int (*fence_emit)(void *private); + /** Driver callback to wait for a fence cookie to have passed. */ + int (*fence_wait)(void *private, unsigned int fence_cookie); + /** Driver-supplied argument to driver callbacks */ + void *driver_priv; + + GLboolean debug; + + GLboolean performed_rendering; + + /* keep track of the current total size of objects we have relocs for */ + unsigned long current_total_size; +} dri_bufmgr_fake; + +typedef struct _dri_bo_fake { + dri_bo bo; + + unsigned id; /* debug only */ + const char *name; + + unsigned dirty:1; + unsigned size_accounted:1; /*this buffers size has been accounted against the aperture */ + unsigned card_dirty:1; /* has the card written to this buffer - we make need to copy it back */ + unsigned int refcount; + /* Flags may consist of any of the DRM_BO flags, plus + * DRM_BO_NO_BACKING_STORE and BM_NO_FENCE_SUBDATA, which are the first two + * driver private flags. + */ + uint64_t flags; + unsigned int alignment; + GLboolean is_static, validated; + unsigned int map_count; + + /* Flags for the buffer to be validated with in command submission */ + uint64_t validate_flags; + + /** relocation list */ + struct fake_buffer_reloc *relocs; + GLuint nr_relocs; + + struct block *block; + void *backing_store; + void (*invalidate_cb)(dri_bo *bo, void *ptr); + void *invalidate_ptr; +} dri_bo_fake; + +typedef struct _dri_fence_fake { + dri_fence fence; + + const char *name; + unsigned int refcount; + unsigned int fence_cookie; + GLboolean flushed; +} dri_fence_fake; + +static int clear_fenced(dri_bufmgr_fake *bufmgr_fake, + unsigned int fence_cookie); + +static int dri_fake_check_aperture_space(dri_bo *bo); + +#define MAXFENCE 0x7fffffff + +static GLboolean FENCE_LTE( unsigned a, unsigned b ) +{ + if (a == b) + return GL_TRUE; + + if (a < b && b - a < (1<<24)) + return GL_TRUE; + + if (a > b && MAXFENCE - a + b < (1<<24)) + return GL_TRUE; + + return GL_FALSE; +} + +static unsigned int +_fence_emit_internal(dri_bufmgr_fake *bufmgr_fake) +{ + bufmgr_fake->last_fence = bufmgr_fake->fence_emit(bufmgr_fake->driver_priv); + return bufmgr_fake->last_fence; +} + +static void +_fence_wait_internal(dri_bufmgr_fake *bufmgr_fake, unsigned int cookie) +{ + int ret; + + ret = bufmgr_fake->fence_wait(bufmgr_fake->driver_priv, cookie); + if (ret != 0) { + _mesa_printf("%s:%d: Error %d waiting for fence.\n", + __FILE__, __LINE__); + abort(); + } + clear_fenced(bufmgr_fake, cookie); +} + +static GLboolean +_fence_test(dri_bufmgr_fake *bufmgr_fake, unsigned fence) +{ + /* Slight problem with wrap-around: + */ + return fence == 0 || FENCE_LTE(fence, bufmgr_fake->last_fence); +} + +/** + * Allocate a memory manager block for the buffer. + */ +static GLboolean +alloc_block(dri_bo *bo) +{ + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + dri_bufmgr_fake *bufmgr_fake= (dri_bufmgr_fake *)bo->bufmgr; + struct block *block = (struct block *)calloc(sizeof *block, 1); + unsigned int align_log2 = _mesa_ffs(bo_fake->alignment) - 1; + GLuint sz; + + if (!block) + return GL_FALSE; + + sz = (bo->size + bo_fake->alignment - 1) & ~(bo_fake->alignment - 1); + + block->mem = mmAllocMem(bufmgr_fake->heap, sz, align_log2, 0); + if (!block->mem) { + free(block); + return GL_FALSE; + } + + make_empty_list(block); + + /* Insert at head or at tail??? + */ + insert_at_tail(&bufmgr_fake->lru, block); + + block->virtual = bufmgr_fake->virtual + + block->mem->ofs - bufmgr_fake->low_offset; + block->bo = bo; + + bo_fake->block = block; + + return GL_TRUE; +} + +/* Release the card storage associated with buf: + */ +static void free_block(dri_bufmgr_fake *bufmgr_fake, struct block *block) +{ + dri_bo_fake *bo_fake; + DBG("free block %p %08x %d %d\n", block, block->mem->ofs, block->on_hardware, block->fenced); + + if (!block) + return; + + bo_fake = (dri_bo_fake *)block->bo; + if (!(bo_fake->flags & BM_NO_BACKING_STORE) && (bo_fake->card_dirty == 1)) { + memcpy(bo_fake->backing_store, block->virtual, block->bo->size); + bo_fake->card_dirty = 1; + bo_fake->dirty = 1; + } + + if (block->on_hardware) { + block->bo = NULL; + } + else if (block->fenced) { + block->bo = NULL; + } + else { + DBG(" - free immediately\n"); + remove_from_list(block); + + mmFreeMem(block->mem); + free(block); + } +} + +static void +alloc_backing_store(dri_bo *bo) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + assert(!bo_fake->backing_store); + assert(!(bo_fake->flags & (BM_PINNED|BM_NO_BACKING_STORE))); + + bo_fake->backing_store = ALIGN_MALLOC(bo->size, 64); + + DBG("alloc_backing - buf %d %p %d\n", bo_fake->id, bo_fake->backing_store, bo->size); + assert(bo_fake->backing_store); +} + +static void +free_backing_store(dri_bo *bo) +{ + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + + if (bo_fake->backing_store) { + assert(!(bo_fake->flags & (BM_PINNED|BM_NO_BACKING_STORE))); + ALIGN_FREE(bo_fake->backing_store); + bo_fake->backing_store = NULL; + } +} + +static void +set_dirty(dri_bo *bo) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + + if (bo_fake->flags & BM_NO_BACKING_STORE && bo_fake->invalidate_cb != NULL) + bo_fake->invalidate_cb(bo, bo_fake->invalidate_ptr); + + assert(!(bo_fake->flags & BM_PINNED)); + + DBG("set_dirty - buf %d\n", bo_fake->id); + bo_fake->dirty = 1; +} + +static GLboolean +evict_lru(dri_bufmgr_fake *bufmgr_fake, GLuint max_fence) +{ + struct block *block, *tmp; + + DBG("%s\n", __FUNCTION__); + + foreach_s(block, tmp, &bufmgr_fake->lru) { + dri_bo_fake *bo_fake = (dri_bo_fake *)block->bo; + + if (bo_fake != NULL && (bo_fake->flags & BM_NO_FENCE_SUBDATA)) + continue; + + if (block->fence && max_fence && !FENCE_LTE(block->fence, max_fence)) + return 0; + + set_dirty(&bo_fake->bo); + bo_fake->block = NULL; + + free_block(bufmgr_fake, block); + return GL_TRUE; + } + + return GL_FALSE; +} + +#define foreach_s_rev(ptr, t, list) \ + for(ptr=(list)->prev,t=(ptr)->prev; list != ptr; ptr=t, t=(t)->prev) + +static GLboolean +evict_mru(dri_bufmgr_fake *bufmgr_fake) +{ + struct block *block, *tmp; + + DBG("%s\n", __FUNCTION__); + + foreach_s_rev(block, tmp, &bufmgr_fake->lru) { + dri_bo_fake *bo_fake = (dri_bo_fake *)block->bo; + + if (bo_fake && (bo_fake->flags & BM_NO_FENCE_SUBDATA)) + continue; + + set_dirty(&bo_fake->bo); + bo_fake->block = NULL; + + free_block(bufmgr_fake, block); + return GL_TRUE; + } + + return GL_FALSE; +} + +/** + * Removes all objects from the fenced list older than the given fence. + */ +static int clear_fenced(dri_bufmgr_fake *bufmgr_fake, + unsigned int fence_cookie) +{ + struct block *block, *tmp; + int ret = 0; + + foreach_s(block, tmp, &bufmgr_fake->fenced) { + assert(block->fenced); + + if (_fence_test(bufmgr_fake, block->fence)) { + + block->fenced = 0; + + if (!block->bo) { + DBG("delayed free: offset %x sz %x\n", + block->mem->ofs, block->mem->size); + remove_from_list(block); + mmFreeMem(block->mem); + free(block); + } + else { + DBG("return to lru: offset %x sz %x\n", + block->mem->ofs, block->mem->size); + move_to_tail(&bufmgr_fake->lru, block); + } + + ret = 1; + } + else { + /* Blocks are ordered by fence, so if one fails, all from + * here will fail also: + */ + DBG("fence not passed: offset %x sz %x %d %d \n", + block->mem->ofs, block->mem->size, block->fence, bufmgr_fake->last_fence); + break; + } + } + + DBG("%s: %d\n", __FUNCTION__, ret); + return ret; +} + +static void fence_blocks(dri_bufmgr_fake *bufmgr_fake, unsigned fence) +{ + struct block *block, *tmp; + + foreach_s (block, tmp, &bufmgr_fake->on_hardware) { + DBG("Fence block %p (sz 0x%x ofs %x buf %p) with fence %d\n", block, + block->mem->size, block->mem->ofs, block->bo, fence); + block->fence = fence; + + block->on_hardware = 0; + block->fenced = 1; + + /* Move to tail of pending list here + */ + move_to_tail(&bufmgr_fake->fenced, block); + } + + assert(is_empty_list(&bufmgr_fake->on_hardware)); +} + +static GLboolean evict_and_alloc_block(dri_bo *bo) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + + assert(bo_fake->block == NULL); + + /* Search for already free memory: + */ + if (alloc_block(bo)) + return GL_TRUE; + + /* If we're not thrashing, allow lru eviction to dig deeper into + * recently used textures. We'll probably be thrashing soon: + */ + if (!bufmgr_fake->thrashing) { + while (evict_lru(bufmgr_fake, 0)) + if (alloc_block(bo)) + return GL_TRUE; + } + + /* Keep thrashing counter alive? + */ + if (bufmgr_fake->thrashing) + bufmgr_fake->thrashing = 20; + + /* Wait on any already pending fences - here we are waiting for any + * freed memory that has been submitted to hardware and fenced to + * become available: + */ + while (!is_empty_list(&bufmgr_fake->fenced)) { + GLuint fence = bufmgr_fake->fenced.next->fence; + _fence_wait_internal(bufmgr_fake, fence); + + if (alloc_block(bo)) + return GL_TRUE; + } + + if (!is_empty_list(&bufmgr_fake->on_hardware)) { + while (!is_empty_list(&bufmgr_fake->fenced)) { + GLuint fence = bufmgr_fake->fenced.next->fence; + _fence_wait_internal(bufmgr_fake, fence); + } + + if (!bufmgr_fake->thrashing) { + DBG("thrashing\n"); + } + bufmgr_fake->thrashing = 20; + + if (alloc_block(bo)) + return GL_TRUE; + } + + while (evict_mru(bufmgr_fake)) + if (alloc_block(bo)) + return GL_TRUE; + + DBG("%s 0x%x bytes failed\n", __FUNCTION__, bo->size); + + return GL_FALSE; +} + +/*********************************************************************** + * Public functions + */ + +/** + * Wait for hardware idle by emitting a fence and waiting for it. + */ +static void +dri_bufmgr_fake_wait_idle(dri_bufmgr_fake *bufmgr_fake) +{ + unsigned int cookie; + + cookie = bufmgr_fake->fence_emit(bufmgr_fake->driver_priv); + _fence_wait_internal(bufmgr_fake, cookie); +} + +/** + * Wait for execution pending on a buffer + */ +static void +dri_bufmgr_fake_bo_wait_idle(dri_bo *bo) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + + if (bo_fake->block == NULL || !bo_fake->block->fenced) + return; + + _fence_wait_internal(bufmgr_fake, bo_fake->block->fence); +} + +/* Specifically ignore texture memory sharing. + * -- just evict everything + * -- and wait for idle + */ +void +dri_bufmgr_fake_contended_lock_take(dri_bufmgr *bufmgr) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bufmgr; + struct block *block, *tmp; + + bufmgr_fake->need_fence = 1; + bufmgr_fake->fail = 0; + + /* Wait for hardware idle. We don't know where acceleration has been + * happening, so we'll need to wait anyway before letting anything get + * put on the card again. + */ + dri_bufmgr_fake_wait_idle(bufmgr_fake); + + /* Check that we hadn't released the lock without having fenced the last + * set of buffers. + */ + assert(is_empty_list(&bufmgr_fake->fenced)); + assert(is_empty_list(&bufmgr_fake->on_hardware)); + + foreach_s(block, tmp, &bufmgr_fake->lru) { + assert(_fence_test(bufmgr_fake, block->fence)); + set_dirty(block->bo); + } +} + +static dri_bo * +dri_fake_bo_alloc(dri_bufmgr *bufmgr, const char *name, + unsigned long size, unsigned int alignment, + uint64_t location_mask) +{ + dri_bufmgr_fake *bufmgr_fake; + dri_bo_fake *bo_fake; + + bufmgr_fake = (dri_bufmgr_fake *)bufmgr; + + assert(size != 0); + + bo_fake = calloc(1, sizeof(*bo_fake)); + if (!bo_fake) + return NULL; + + bo_fake->bo.size = size; + bo_fake->bo.offset = -1; + bo_fake->bo.virtual = NULL; + bo_fake->bo.bufmgr = bufmgr; + bo_fake->refcount = 1; + + /* Alignment must be a power of two */ + assert((alignment & (alignment - 1)) == 0); + if (alignment == 0) + alignment = 1; + bo_fake->alignment = alignment; + bo_fake->id = ++bufmgr_fake->buf_nr; + bo_fake->name = name; + bo_fake->flags = 0; + bo_fake->is_static = GL_FALSE; + + DBG("drm_bo_alloc: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name, + bo_fake->bo.size / 1024); + + return &bo_fake->bo; +} + +static dri_bo * +dri_fake_bo_alloc_static(dri_bufmgr *bufmgr, const char *name, + unsigned long offset, unsigned long size, + void *virtual, uint64_t location_mask) +{ + dri_bufmgr_fake *bufmgr_fake; + dri_bo_fake *bo_fake; + + bufmgr_fake = (dri_bufmgr_fake *)bufmgr; + + assert(size != 0); + + bo_fake = calloc(1, sizeof(*bo_fake)); + if (!bo_fake) + return NULL; + + bo_fake->bo.size = size; + bo_fake->bo.offset = offset; + bo_fake->bo.virtual = virtual; + bo_fake->bo.bufmgr = bufmgr; + bo_fake->refcount = 1; + bo_fake->id = ++bufmgr_fake->buf_nr; + bo_fake->name = name; + bo_fake->flags = BM_PINNED | DRM_BO_FLAG_NO_MOVE; + bo_fake->is_static = GL_TRUE; + + DBG("drm_bo_alloc_static: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name, + bo_fake->bo.size / 1024); + + return &bo_fake->bo; +} + +static void +dri_fake_bo_reference(dri_bo *bo) +{ + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + + bo_fake->refcount++; +} + +static void +dri_fake_bo_unreference(dri_bo *bo) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + int i; + + if (!bo) + return; + + if (--bo_fake->refcount == 0) { + assert(bo_fake->map_count == 0); + /* No remaining references, so free it */ + if (bo_fake->block) + free_block(bufmgr_fake, bo_fake->block); + free_backing_store(bo); + + for (i = 0; i < bo_fake->nr_relocs; i++) + dri_bo_unreference(bo_fake->relocs[i].target_buf); + + DBG("drm_bo_unreference: free buf %d %s\n", bo_fake->id, bo_fake->name); + + free(bo_fake->relocs); + free(bo); + + return; + } +} + +/** + * Set the buffer as not requiring backing store, and instead get the callback + * invoked whenever it would be set dirty. + */ +void dri_bo_fake_disable_backing_store(dri_bo *bo, + void (*invalidate_cb)(dri_bo *bo, + void *ptr), + void *ptr) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + + if (bo_fake->backing_store) + free_backing_store(bo); + + bo_fake->flags |= BM_NO_BACKING_STORE; + + DBG("disable_backing_store set buf %d dirty\n", bo_fake->id); + bo_fake->dirty = 1; + bo_fake->invalidate_cb = invalidate_cb; + bo_fake->invalidate_ptr = ptr; + + /* Note that it is invalid right from the start. Also note + * invalidate_cb is called with the bufmgr locked, so cannot + * itself make bufmgr calls. + */ + if (invalidate_cb != NULL) + invalidate_cb(bo, ptr); +} + +/** + * Map a buffer into bo->virtual, allocating either card memory space (If + * BM_NO_BACKING_STORE or BM_PINNED) or backing store, as necessary. + */ +static int +dri_fake_bo_map(dri_bo *bo, GLboolean write_enable) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + + /* Static buffers are always mapped. */ + if (bo_fake->is_static) + return 0; + + /* Allow recursive mapping. Mesa may recursively map buffers with + * nested display loops, and it is used internally in bufmgr_fake + * for relocation. + */ + if (bo_fake->map_count++ != 0) + return 0; + + { + DBG("drm_bo_map: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name, + bo_fake->bo.size / 1024); + + if (bo->virtual != NULL) { + _mesa_printf("%s: already mapped\n", __FUNCTION__); + abort(); + } + else if (bo_fake->flags & (BM_NO_BACKING_STORE|BM_PINNED)) { + + if (!bo_fake->block && !evict_and_alloc_block(bo)) { + DBG("%s: alloc failed\n", __FUNCTION__); + bufmgr_fake->fail = 1; + return 1; + } + else { + assert(bo_fake->block); + bo_fake->dirty = 0; + + if (!(bo_fake->flags & BM_NO_FENCE_SUBDATA) && + bo_fake->block->fenced) { + dri_bufmgr_fake_bo_wait_idle(bo); + } + + bo->virtual = bo_fake->block->virtual; + } + } + else { + if (write_enable) + set_dirty(bo); + + if (bo_fake->backing_store == 0) + alloc_backing_store(bo); + + bo->virtual = bo_fake->backing_store; + } + } + + return 0; +} + +static int +dri_fake_bo_unmap(dri_bo *bo) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + + /* Static buffers are always mapped. */ + if (bo_fake->is_static) + return 0; + + assert(bo_fake->map_count != 0); + if (--bo_fake->map_count != 0) + return 0; + + DBG("drm_bo_unmap: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name, + bo_fake->bo.size / 1024); + + bo->virtual = NULL; + + return 0; +} + +static void +dri_fake_kick_all(dri_bufmgr_fake *bufmgr_fake) +{ + struct block *block, *tmp; + + bufmgr_fake->performed_rendering = GL_FALSE; + /* okay for ever BO that is on the HW kick it off. + seriously not afraid of the POLICE right now */ + foreach_s(block, tmp, &bufmgr_fake->on_hardware) { + dri_bo_fake *bo_fake = (dri_bo_fake *)block->bo; + + block->on_hardware = 0; + free_block(bufmgr_fake, block); + bo_fake->block = NULL; + bo_fake->validated = GL_FALSE; + if (!(bo_fake->flags & BM_NO_BACKING_STORE)) + bo_fake->dirty = 1; + } +} + +static int +dri_fake_bo_validate(dri_bo *bo, uint64_t flags) +{ + dri_bufmgr_fake *bufmgr_fake; + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + + /* XXX: Sanity-check whether we've already validated this one under + * different flags. See drmAddValidateItem(). + */ + bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; + + DBG("drm_bo_validate: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name, + bo_fake->bo.size / 1024); + + /* Sanity check: Buffers should be unmapped before being validated. + * This is not so much of a problem for bufmgr_fake, but TTM refuses, + * and the problem is harder to debug there. + */ + assert(bo_fake->map_count == 0); + + if (bo_fake->is_static) { + /* Add it to the needs-fence list */ + bufmgr_fake->need_fence = 1; + return 0; + } + + /* reset size accounted */ + bo_fake->size_accounted = 0; + + /* Allocate the card memory */ + if (!bo_fake->block && !evict_and_alloc_block(bo)) { + bufmgr_fake->fail = 1; + DBG("Failed to validate buf %d:%s\n", bo_fake->id, bo_fake->name); + return -1; + } + + assert(bo_fake->block); + assert(bo_fake->block->bo == &bo_fake->bo); + + bo->offset = bo_fake->block->mem->ofs; + + /* Upload the buffer contents if necessary */ + if (bo_fake->dirty) { + DBG("Upload dirty buf %d:%s, sz %d offset 0x%x\n", bo_fake->id, + bo_fake->name, bo->size, bo_fake->block->mem->ofs); + + assert(!(bo_fake->flags & + (BM_NO_BACKING_STORE|BM_PINNED))); + + /* Actually, should be able to just wait for a fence on the memory, + * which we would be tracking when we free it. Waiting for idle is + * a sufficiently large hammer for now. + */ + dri_bufmgr_fake_wait_idle(bufmgr_fake); + + /* we may never have mapped this BO so it might not have any backing + * store if this happens it should be rare, but 0 the card memory + * in any case */ + if (bo_fake->backing_store) + memcpy(bo_fake->block->virtual, bo_fake->backing_store, bo->size); + else + memset(bo_fake->block->virtual, 0, bo->size); + + bo_fake->dirty = 0; + } + + bo_fake->block->fenced = 0; + bo_fake->block->on_hardware = 1; + move_to_tail(&bufmgr_fake->on_hardware, bo_fake->block); + + bo_fake->validated = GL_TRUE; + bufmgr_fake->need_fence = 1; + + return 0; +} + +static dri_fence * +dri_fake_fence_validated(dri_bufmgr *bufmgr, const char *name, + GLboolean flushed) +{ + dri_fence_fake *fence_fake; + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bufmgr; + unsigned int cookie; + + fence_fake = malloc(sizeof(*fence_fake)); + if (!fence_fake) + return NULL; + + fence_fake->refcount = 1; + fence_fake->name = name; + fence_fake->flushed = flushed; + fence_fake->fence.bufmgr = bufmgr; + + cookie = _fence_emit_internal(bufmgr_fake); + fence_fake->fence_cookie = cookie; + fence_blocks(bufmgr_fake, cookie); + + DBG("drm_fence_validated: 0x%08x cookie\n", fence_fake->fence_cookie); + + return &fence_fake->fence; +} + +static void +dri_fake_fence_reference(dri_fence *fence) +{ + dri_fence_fake *fence_fake = (dri_fence_fake *)fence; + + ++fence_fake->refcount; +} + +static void +dri_fake_fence_unreference(dri_fence *fence) +{ + dri_fence_fake *fence_fake = (dri_fence_fake *)fence; + + if (!fence) + return; + + if (--fence_fake->refcount == 0) { + free(fence); + return; + } +} + +static void +dri_fake_fence_wait(dri_fence *fence) +{ + dri_fence_fake *fence_fake = (dri_fence_fake *)fence; + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)fence->bufmgr; + + DBG("drm_fence_wait: 0x%08x cookie\n", fence_fake->fence_cookie); + + _fence_wait_internal(bufmgr_fake, fence_fake->fence_cookie); +} + +static void +dri_fake_destroy(dri_bufmgr *bufmgr) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bufmgr; + + mmDestroy(bufmgr_fake->heap); + free(bufmgr); +} + +static int +dri_fake_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta, + GLuint offset, dri_bo *target_buf) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)reloc_buf->bufmgr; + struct fake_buffer_reloc *r; + dri_bo_fake *reloc_fake = (dri_bo_fake *)reloc_buf; + dri_bo_fake *target_fake = (dri_bo_fake *)target_buf; + int i; + + assert(reloc_buf); + assert(target_buf); + + assert(target_fake->is_static || target_fake->size_accounted); + + if (reloc_fake->relocs == NULL) { + reloc_fake->relocs = malloc(sizeof(struct fake_buffer_reloc) * + MAX_RELOCS); + } + + r = &reloc_fake->relocs[reloc_fake->nr_relocs++]; + + assert(reloc_fake->nr_relocs <= MAX_RELOCS); + + dri_bo_reference(target_buf); + + r->target_buf = target_buf; + r->offset = offset; + r->last_target_offset = target_buf->offset; + r->delta = delta; + r->validate_flags = flags; + + if (bufmgr_fake->debug) { + /* Check that a conflicting relocation hasn't already been emitted. */ + for (i = 0; i < reloc_fake->nr_relocs - 1; i++) { + struct fake_buffer_reloc *r2 = &reloc_fake->relocs[i]; + + assert(r->offset != r2->offset); + } + } + + return 0; +} + +/** + * Incorporates the validation flags associated with each relocation into + * the combined validation flags for the buffer on this batchbuffer submission. + */ +static void +dri_fake_calculate_validate_flags(dri_bo *bo) +{ + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + int i; + + for (i = 0; i < bo_fake->nr_relocs; i++) { + struct fake_buffer_reloc *r = &bo_fake->relocs[i]; + dri_bo_fake *target_fake = (dri_bo_fake *)r->target_buf; + + /* Do the same for the tree of buffers we depend on */ + dri_fake_calculate_validate_flags(r->target_buf); + + if (target_fake->validate_flags == 0) { + target_fake->validate_flags = r->validate_flags; + } else { + /* Mask the memory location to the intersection of all the memory + * locations the buffer is being validated to. + */ + target_fake->validate_flags = + (target_fake->validate_flags & ~DRM_BO_MASK_MEM) | + (r->validate_flags & target_fake->validate_flags & + DRM_BO_MASK_MEM); + /* All the other flags just accumulate. */ + target_fake->validate_flags |= r->validate_flags & ~DRM_BO_MASK_MEM; + } + } +} + + +static int +dri_fake_reloc_and_validate_buffer(dri_bo *bo) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + int i, ret; + + assert(bo_fake->map_count == 0); + + for (i = 0; i < bo_fake->nr_relocs; i++) { + struct fake_buffer_reloc *r = &bo_fake->relocs[i]; + dri_bo_fake *target_fake = (dri_bo_fake *)r->target_buf; + uint32_t reloc_data; + + /* Validate the target buffer if that hasn't been done. */ + if (!target_fake->validated) { + ret = dri_fake_reloc_and_validate_buffer(r->target_buf); + if (ret != 0) { + if (bo->virtual != NULL) + dri_bo_unmap(bo); + return ret; + } + } + + /* Calculate the value of the relocation entry. */ + if (r->target_buf->offset != r->last_target_offset) { + reloc_data = r->target_buf->offset + r->delta; + + if (bo->virtual == NULL) + dri_bo_map(bo, GL_TRUE); + + *(uint32_t *)(bo->virtual + r->offset) = reloc_data; + + r->last_target_offset = r->target_buf->offset; + } + } + + if (bo->virtual != NULL) + dri_bo_unmap(bo); + + if (bo_fake->validate_flags & DRM_BO_FLAG_WRITE) { + if (!(bo_fake->flags & (BM_NO_BACKING_STORE|BM_PINNED))) { + if (bo_fake->backing_store == 0) + alloc_backing_store(bo); + + bo_fake->card_dirty = 1; + } + bufmgr_fake->performed_rendering = GL_TRUE; + } + + return dri_fake_bo_validate(bo, bo_fake->validate_flags); +} + +static void * +dri_fake_process_relocs(dri_bo *batch_buf, GLuint *count_p) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)batch_buf->bufmgr; + dri_bo_fake *batch_fake = (dri_bo_fake *)batch_buf; + int ret; + int retry_count = 0; + + bufmgr_fake->performed_rendering = GL_FALSE; + + dri_fake_calculate_validate_flags(batch_buf); + + batch_fake->validate_flags = DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ; + + /* we've ran out of RAM so blow the whole lot away and retry */ + restart: + ret = dri_fake_reloc_and_validate_buffer(batch_buf); + if (bufmgr_fake->fail == 1) { + if (retry_count == 0) { + retry_count++; + dri_fake_kick_all(bufmgr_fake); + bufmgr_fake->fail = 0; + goto restart; + } else /* dump out the memory here */ + mmDumpMemInfo(bufmgr_fake->heap); + } + + assert(ret == 0); + + *count_p = 0; /* junk */ + + bufmgr_fake->current_total_size = 0; + return NULL; +} + +static void +dri_bo_fake_post_submit(dri_bo *bo) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + int i; + + for (i = 0; i < bo_fake->nr_relocs; i++) { + struct fake_buffer_reloc *r = &bo_fake->relocs[i]; + dri_bo_fake *target_fake = (dri_bo_fake *)r->target_buf; + + if (target_fake->validated) + dri_bo_fake_post_submit(r->target_buf); + + DBG("%s@0x%08x + 0x%08x -> %s@0x%08x + 0x%08x\n", + bo_fake->name, (uint32_t)bo->offset, r->offset, + target_fake->name, (uint32_t)r->target_buf->offset, r->delta); + } + + assert(bo_fake->map_count == 0); + bo_fake->validated = GL_FALSE; + bo_fake->validate_flags = 0; +} + + +static void +dri_fake_post_submit(dri_bo *batch_buf, dri_fence **last_fence) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)batch_buf->bufmgr; + dri_fence *fo; + + fo = dri_fake_fence_validated(batch_buf->bufmgr, "Batch fence", GL_TRUE); + + if (bufmgr_fake->performed_rendering) { + dri_fence_unreference(*last_fence); + *last_fence = fo; + } else { + dri_fence_unreference(fo); + } + + dri_bo_fake_post_submit(batch_buf); +} + +static int +dri_fake_check_aperture_space(dri_bo *bo) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + GLuint sz; + + sz = (bo->size + bo_fake->alignment - 1) & ~(bo_fake->alignment - 1); + + if (bo_fake->size_accounted || bo_fake->is_static) + return 0; + + if (bufmgr_fake->current_total_size + sz > bufmgr_fake->size) { + DBG("check_space: %s bo %d %d overflowed bufmgr size %d\n", bo_fake->name, bo_fake->id, sz, bufmgr_fake->size); + return -1; + } + + bufmgr_fake->current_total_size += sz; + bo_fake->size_accounted = 1; + DBG("drm_check_space: buf %d, %s %d %d\n", bo_fake->id, bo_fake->name, bo->size, bufmgr_fake->current_total_size); + return 0; +} + +dri_bufmgr * +dri_bufmgr_fake_init(unsigned long low_offset, void *low_virtual, + unsigned long size, + unsigned int (*fence_emit)(void *private), + int (*fence_wait)(void *private, unsigned int cookie), + void *driver_priv) +{ + dri_bufmgr_fake *bufmgr_fake; + + bufmgr_fake = calloc(1, sizeof(*bufmgr_fake)); + + /* Initialize allocator */ + make_empty_list(&bufmgr_fake->fenced); + make_empty_list(&bufmgr_fake->on_hardware); + make_empty_list(&bufmgr_fake->lru); + + bufmgr_fake->low_offset = low_offset; + bufmgr_fake->virtual = low_virtual; + bufmgr_fake->size = size; + bufmgr_fake->heap = mmInit(low_offset, size); + + /* Hook in methods */ + bufmgr_fake->bufmgr.bo_alloc = dri_fake_bo_alloc; + bufmgr_fake->bufmgr.bo_alloc_static = dri_fake_bo_alloc_static; + bufmgr_fake->bufmgr.bo_reference = dri_fake_bo_reference; + bufmgr_fake->bufmgr.bo_unreference = dri_fake_bo_unreference; + bufmgr_fake->bufmgr.bo_map = dri_fake_bo_map; + bufmgr_fake->bufmgr.bo_unmap = dri_fake_bo_unmap; + bufmgr_fake->bufmgr.fence_wait = dri_fake_fence_wait; + bufmgr_fake->bufmgr.fence_reference = dri_fake_fence_reference; + bufmgr_fake->bufmgr.fence_unreference = dri_fake_fence_unreference; + bufmgr_fake->bufmgr.destroy = dri_fake_destroy; + bufmgr_fake->bufmgr.emit_reloc = dri_fake_emit_reloc; + bufmgr_fake->bufmgr.process_relocs = dri_fake_process_relocs; + bufmgr_fake->bufmgr.post_submit = dri_fake_post_submit; + bufmgr_fake->bufmgr.check_aperture_space = dri_fake_check_aperture_space; + bufmgr_fake->bufmgr.debug = GL_FALSE; + + bufmgr_fake->fence_emit = fence_emit; + bufmgr_fake->fence_wait = fence_wait; + bufmgr_fake->driver_priv = driver_priv; + + return &bufmgr_fake->bufmgr; +} + diff --git a/src/mesa/drivers/dri/common/spantmp2.h b/src/mesa/drivers/dri/common/spantmp2.h index a1e56eb148..53f5f846a0 100644 --- a/src/mesa/drivers/dri/common/spantmp2.h +++ b/src/mesa/drivers/dri/common/spantmp2.h @@ -48,34 +48,36 @@ #define HW_WRITE_CLIPLOOP() HW_CLIPLOOP() #endif + #if (SPANTMP_PIXEL_FMT == GL_RGB) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5) /** ** GL_RGB, GL_UNSIGNED_SHORT_5_6_5 **/ -#ifndef GET_VALUE #ifndef GET_PTR #define GET_PTR(_x, _y) (buf + (_x) * 2 + (_y) * pitch) #endif -#define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y)) -#define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v) -#endif /* GET_VALUE */ - #define INIT_MONO_PIXEL(p, color) \ p = PACK_COLOR_565( color[0], color[1], color[2] ) #define WRITE_RGBA( _x, _y, r, g, b, a ) \ - PUT_VALUE(_x, _y, ((((int)r & 0xf8) << 8) | \ - (((int)g & 0xfc) << 3) | \ - (((int)b & 0xf8) >> 3))) \ + do { \ + GLshort * _p = (GLshort *) GET_PTR(_x, _y); \ + _p[0] = ((((int)r & 0xf8) << 8) | (((int)g & 0xfc) << 3) | \ + (((int)b & 0xf8) >> 3)); \ + } while(0) -#define WRITE_PIXEL( _x, _y, p ) PUT_VALUE(_x, _y, p) +#define WRITE_PIXEL( _x, _y, p ) \ + do { \ + GLushort * _p = (GLushort *) GET_PTR(_x, _y); \ + _p[0] = p; \ + } while(0) #define READ_RGBA( rgba, _x, _y ) \ do { \ - GLushort p = GET_VALUE(_x, _y); \ + GLushort p = *(volatile GLshort *) GET_PTR(_x, _y); \ rgba[0] = ((p >> 8) & 0xf8) * 255 / 0xf8; \ rgba[1] = ((p >> 3) & 0xfc) * 255 / 0xfc; \ rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8; \ @@ -88,30 +90,29 @@ ** GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV **/ -#ifndef GET_VALUE #ifndef GET_PTR #define GET_PTR(_x, _y) ( buf + (_x) * 4 + (_y) * pitch) #endif -#define GET_VALUE(_x, _y) *(volatile GLuint *)(GET_PTR(_x, _y)) -#define PUT_VALUE(_x, _y, _v) *(volatile GLuint *)(GET_PTR(_x, _y)) = (_v) -#endif /* GET_VALUE */ - # define INIT_MONO_PIXEL(p, color) \ p = PACK_COLOR_8888(color[3], color[0], color[1], color[2]) # define WRITE_RGBA(_x, _y, r, g, b, a) \ - PUT_VALUE(_x, _y, ((r << 16) | \ - (g << 8) | \ - (b << 0) | \ - (a << 24))) + do { \ + GLuint * _p = (GLuint *) GET_PTR(_x, _y); \ + _p[0] = ((r << 16) | (g << 8) | (b << 0) | (a << 24)); \ + } while(0) -#define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p) +#define WRITE_PIXEL(_x, _y, p) \ + do { \ + GLuint * _p = (GLuint *) GET_PTR(_x, _y); \ + _p[0] = p; \ + } while(0) # if defined( USE_X86_ASM ) # define READ_RGBA(rgba, _x, _y) \ do { \ - GLuint p = GET_VALUE(_x, _y); \ + GLuint p = *(volatile GLuint *) GET_PTR(_x, _y); \ __asm__ __volatile__( "bswap %0; rorl $8, %0" \ : "=r" (p) : "0" (p) ); \ ((GLuint *)rgba)[0] = p; \ @@ -122,14 +123,14 @@ */ # define READ_RGBA( rgba, _x, _y ) \ do { \ - GLuint p = GET_VALUE(_x, _y); \ + GLuint p = *(volatile GLuint *) GET_PTR(_x, _y); \ GLuint t = p; \ *((uint32_t *) rgba) = (t >> 24) | (p << 8); \ } while (0) # else # define READ_RGBA( rgba, _x, _y ) \ do { \ - GLuint p = GET_VALUE(_x, _y); \ + GLuint p = *(volatile GLuint *) GET_PTR(_x, _y); \ rgba[0] = (p >> 16) & 0xff; \ rgba[1] = (p >> 8) & 0xff; \ rgba[2] = (p >> 0) & 0xff; \ @@ -388,8 +389,7 @@ static void TAG(ReadRGBASpan)( GLcontext *ctx, } -#if defined(GET_PTR) && \ - defined(USE_MMX_ASM) && \ +#if defined(USE_MMX_ASM) && \ (((SPANTMP_PIXEL_FMT == GL_BGRA) && \ (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)) || \ ((SPANTMP_PIXEL_FMT == GL_RGB) && \ @@ -440,8 +440,7 @@ static void TAG2(ReadRGBASpan,_MMX)( GLcontext *ctx, #endif -#if defined(GET_PTR) && \ - defined(USE_SSE_ASM) && \ +#if defined(USE_SSE_ASM) && \ (SPANTMP_PIXEL_FMT == GL_BGRA) && \ (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV) static void TAG2(ReadRGBASpan,_SSE2)( GLcontext *ctx, @@ -475,8 +474,7 @@ static void TAG2(ReadRGBASpan,_SSE2)( GLcontext *ctx, } #endif -#if defined(GET_PTR) && \ - defined(USE_SSE_ASM) && \ +#if defined(USE_SSE_ASM) && \ (SPANTMP_PIXEL_FMT == GL_BGRA) && \ (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV) static void TAG2(ReadRGBASpan,_SSE)( GLcontext *ctx, @@ -569,7 +567,6 @@ static void TAG(InitPointers)(struct gl_renderbuffer *rb) rb->PutMonoValues = TAG(WriteMonoRGBAPixels); rb->GetValues = TAG(ReadRGBAPixels); -#if defined(GET_PTR) #if defined(USE_SSE_ASM) && \ (SPANTMP_PIXEL_FMT == GL_BGRA) && \ (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV) @@ -599,7 +596,6 @@ static void TAG(InitPointers)(struct gl_renderbuffer *rb) } else #endif -#endif /* GET_PTR */ { if (DBG) fprintf( stderr, "Using %s version of GetRow\n", "C" ); rb->GetRow = TAG(ReadRGBASpan); @@ -614,8 +610,6 @@ static void TAG(InitPointers)(struct gl_renderbuffer *rb) #undef READ_RGBA #undef TAG #undef TAG2 -#undef GET_VALUE -#undef PUT_VALUE #undef GET_PTR #undef SPANTMP_PIXEL_FMT #undef SPANTMP_PIXEL_TYPE diff --git a/src/mesa/drivers/dri/i915/Makefile b/src/mesa/drivers/dri/i915/Makefile index fd77980ebf..bdd3b85260 100644 --- a/src/mesa/drivers/dri/i915/Makefile +++ b/src/mesa/drivers/dri/i915/Makefile @@ -30,8 +30,8 @@ DRIVER_SOURCES = \ intel_pixel.c \ intel_pixel_bitmap.c \ intel_pixel_copy.c \ - intel_pixel_draw.c \ intel_pixel_read.c \ + intel_pixel_draw.c \ intel_buffers.c \ intel_blit.c \ i915_tex.c \ @@ -52,10 +52,12 @@ DRIVER_SOURCES = \ intel_state.c \ intel_tris.c \ intel_fbo.c \ - intel_depthstencil.c + intel_depthstencil.c \ + intel_bufmgr_ttm.c C_SOURCES = \ $(COMMON_SOURCES) \ + $(COMMON_BM_SOURCES) \ $(DRIVER_SOURCES) ASM_SOURCES = @@ -67,7 +69,6 @@ DRIVER_DEFINES = -I../intel -I../intel/server -DI915 \ include ../Makefile.template intel_decode.o: ../intel/intel_decode.c - intel_tex_layout.o: ../intel/intel_tex_layout.c symlinks: diff --git a/src/mesa/drivers/dri/i915/i830_context.c b/src/mesa/drivers/dri/i915/i830_context.c index 116b52511f..16c8a8d24f 100644 --- a/src/mesa/drivers/dri/i915/i830_context.c +++ b/src/mesa/drivers/dri/i915/i830_context.c @@ -81,9 +81,6 @@ i830CreateContext(const __GLcontextModes * mesaVis, _tnl_destroy_pipeline(ctx); _tnl_install_pipeline(ctx, intel_pipeline); - if (intel->no_rast) - FALLBACK(intel, INTEL_FALLBACK_USER, 1); - intel->ctx.Const.MaxTextureUnits = I830_TEX_UNITS; intel->ctx.Const.MaxTextureImageUnits = I830_TEX_UNITS; intel->ctx.Const.MaxTextureCoordUnits = I830_TEX_UNITS; diff --git a/src/mesa/drivers/dri/i915/i830_reg.h b/src/mesa/drivers/dri/i915/i830_reg.h index d210c2d08e..41280bca7c 100644 --- a/src/mesa/drivers/dri/i915/i830_reg.h +++ b/src/mesa/drivers/dri/i915/i830_reg.h @@ -494,6 +494,10 @@ #define VFT1_TEX0_FMT(x) (x) #define VFT1_TEX0_MASK 3 #define VFT1_TEX1_SHIFT 2 +#define TEXCOORDFMT_2D 0 +#define TEXCOORDFMT_3D 1 +#define TEXCOORDFMT_4D 2 +#define TEXCOORDFMT_1D 3 /*New stuff picked up along the way */ @@ -631,4 +635,8 @@ #define ENABLE_TEX_STREAM_MAP_IDX (1<<3) #define TEX_STREAM_MAP_IDX(x) (x) + +#define MI_FLUSH ((0<<29)|(4<<23)) +#define FLUSH_MAP_CACHE (1<<0) + #endif diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index 40a50ff772..c5a85fe035 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -31,7 +31,6 @@ #include "i830_reg.h" #include "intel_batchbuffer.h" #include "intel_regions.h" -#include "intel_tris.h" #include "tnl/t_context.h" #include "tnl/t_vertex.h" @@ -420,12 +419,10 @@ i830_emit_state(struct intel_context *intel) { struct i830_context *i830 = i830_context(&intel->ctx); struct i830_hw_state *state = i830->current; - int i, count; + int i, ret, count; GLuint dirty; GET_CURRENT_CONTEXT(ctx); BATCH_LOCALS; - dri_bo *aper_array[3 + I830_TEX_UNITS]; - int aper_count; /* We don't hold the lock at this point, so want to make sure that * there won't be a buffer wrap between the state emits and the primitive @@ -438,28 +435,26 @@ i830_emit_state(struct intel_context *intel) * Set the space as LOOP_CLIPRECTS now, since that's what our primitives * will be emitted under. */ - intel_batchbuffer_require_space(intel->batch, - get_state_size(state) + INTEL_PRIM_EMIT_SIZE, + intel_batchbuffer_require_space(intel->batch, get_state_size(state) + 8, LOOP_CLIPRECTS); count = 0; again: - aper_count = 0; dirty = get_dirty(state); - aper_array[aper_count++] = intel->batch->buf; + ret = 0; if (dirty & I830_UPLOAD_BUFFERS) { - aper_array[aper_count++] = state->draw_region->buffer; - aper_array[aper_count++] = state->depth_region->buffer; + ret |= dri_bufmgr_check_aperture_space(state->draw_region->buffer); + ret |= dri_bufmgr_check_aperture_space(state->depth_region->buffer); } - + for (i = 0; i < I830_TEX_UNITS; i++) if (dirty & I830_UPLOAD_TEX(i)) { if (state->tex_buffer[i]) { - aper_array[aper_count++] = state->tex_buffer[i]; + ret |= dri_bufmgr_check_aperture_space(state->tex_buffer[i]); } } - if (dri_bufmgr_check_aperture_space(aper_array, aper_count)) { + if (ret) { if (count == 0) { count++; intel_batchbuffer_flush(intel->batch); @@ -495,14 +490,14 @@ i830_emit_state(struct intel_context *intel) OUT_BATCH(state->Buffer[I830_DESTREG_CBUFADDR0]); OUT_BATCH(state->Buffer[I830_DESTREG_CBUFADDR1]); OUT_RELOC(state->draw_region->buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, state->draw_region->draw_offset); if (state->depth_region) { OUT_BATCH(state->Buffer[I830_DESTREG_DBUFADDR0]); OUT_BATCH(state->Buffer[I830_DESTREG_DBUFADDR1]); OUT_RELOC(state->depth_region->buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, state->depth_region->draw_offset); } @@ -529,7 +524,7 @@ i830_emit_state(struct intel_context *intel) if (state->tex_buffer[i]) { OUT_RELOC(state->tex_buffer[i], - I915_GEM_DOMAIN_SAMPLER, 0, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, state->tex_offset[i] | TM0S0_USE_FENCE); } else if (state == &i830->meta) { @@ -722,5 +717,4 @@ i830InitVtbl(struct i830_context *i830) i830->intel.vtbl.render_prevalidate = i830_render_prevalidate; i830->intel.vtbl.assert_not_dirty = i830_assert_not_dirty; i830->intel.vtbl.note_unlock = i830_note_unlock; - i830->intel.vtbl.finish_batch = intel_finish_vb; } diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index 940b02513d..bd9f1d5f5d 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -138,9 +138,6 @@ i915CreateContext(const __GLcontextModes * mesaVis, _tnl_destroy_pipeline(ctx); _tnl_install_pipeline(ctx, intel_pipeline); - if (intel->no_rast) - FALLBACK(intel, INTEL_FALLBACK_USER, 1); - ctx->Const.MaxTextureUnits = I915_TEX_UNITS; ctx->Const.MaxTextureImageUnits = I915_TEX_UNITS; ctx->Const.MaxTextureCoordUnits = I915_TEX_UNITS; diff --git a/src/mesa/drivers/dri/i915/i915_reg.h b/src/mesa/drivers/dri/i915/i915_reg.h index 8891e11c6f..b5585e70e7 100644 --- a/src/mesa/drivers/dri/i915/i915_reg.h +++ b/src/mesa/drivers/dri/i915/i915_reg.h @@ -325,6 +325,118 @@ #define SCISSOR_RECT_0_YMAX(x) ((x)<<16) #define SCISSOR_RECT_0_XMAX(x) (x) +/* p189 */ +#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 ((0x3<<29)|(0x1d<<24)|(0x04<<16)) +#define I1_LOAD_S(n) (1<<(4+n)) + +#define S0_VB_OFFSET_MASK 0xffffffc +#define S0_AUTO_CACHE_INV_DISABLE (1<<0) + +#define S1_VERTEX_WIDTH_SHIFT 24 +#define S1_VERTEX_WIDTH_MASK (0x3f<<24) +#define S1_VERTEX_PITCH_SHIFT 16 +#define S1_VERTEX_PITCH_MASK (0x3f<<16) + +#define TEXCOORDFMT_2D 0x0 +#define TEXCOORDFMT_3D 0x1 +#define TEXCOORDFMT_4D 0x2 +#define TEXCOORDFMT_1D 0x3 +#define TEXCOORDFMT_2D_16 0x4 +#define TEXCOORDFMT_4D_16 0x5 +#define TEXCOORDFMT_NOT_PRESENT 0xf +#define S2_TEXCOORD_FMT0_MASK 0xf +#define S2_TEXCOORD_FMT1_SHIFT 4 +#define S2_TEXCOORD_FMT(unit, type) ((type)<<(unit*4)) +#define S2_TEXCOORD_NONE (~0) + +/* S3 not interesting */ + +#define S4_POINT_WIDTH_SHIFT 23 +#define S4_POINT_WIDTH_MASK (0x1ff<<23) +#define S4_LINE_WIDTH_SHIFT 19 +#define S4_LINE_WIDTH_ONE (0x2<<19) +#define S4_LINE_WIDTH_MASK (0xf<<19) +#define S4_FLATSHADE_ALPHA (1<<18) +#define S4_FLATSHADE_FOG (1<<17) +#define S4_FLATSHADE_SPECULAR (1<<16) +#define S4_FLATSHADE_COLOR (1<<15) +#define S4_CULLMODE_BOTH (0<<13) +#define S4_CULLMODE_NONE (1<<13) +#define S4_CULLMODE_CW (2<<13) +#define S4_CULLMODE_CCW (3<<13) +#define S4_CULLMODE_MASK (3<<13) +#define S4_VFMT_POINT_WIDTH (1<<12) +#define S4_VFMT_SPEC_FOG (1<<11) +#define S4_VFMT_COLOR (1<<10) +#define S4_VFMT_DEPTH_OFFSET (1<<9) +#define S4_VFMT_XYZ (1<<6) +#define S4_VFMT_XYZW (2<<6) +#define S4_VFMT_XY (3<<6) +#define S4_VFMT_XYW (4<<6) +#define S4_VFMT_XYZW_MASK (7<<6) +#define S4_FORCE_DEFAULT_DIFFUSE (1<<5) +#define S4_FORCE_DEFAULT_SPECULAR (1<<4) +#define S4_LOCAL_DEPTH_OFFSET_ENABLE (1<<3) +#define S4_VFMT_FOG_PARAM (1<<2) +#define S4_SPRITE_POINT_ENABLE (1<<1) +#define S4_LINE_ANTIALIAS_ENABLE (1<<0) + +#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH | \ + S4_VFMT_SPEC_FOG | \ + S4_VFMT_COLOR | \ + S4_VFMT_DEPTH_OFFSET | \ + S4_VFMT_XYZW_MASK | \ + S4_VFMT_FOG_PARAM) + + +#define S5_WRITEDISABLE_ALPHA (1<<31) +#define S5_WRITEDISABLE_RED (1<<30) +#define S5_WRITEDISABLE_GREEN (1<<29) +#define S5_WRITEDISABLE_BLUE (1<<28) +#define S5_WRITEDISABLE_MASK (0xf<<28) +#define S5_FORCE_DEFAULT_POINT_SIZE (1<<27) +#define S5_LAST_PIXEL_ENABLE (1<<26) +#define S5_GLOBAL_DEPTH_OFFSET_ENABLE (1<<25) +#define S5_FOG_ENABLE (1<<24) +#define S5_STENCIL_REF_SHIFT 16 +#define S5_STENCIL_REF_MASK (0xff<<16) +#define S5_STENCIL_TEST_FUNC_SHIFT 13 +#define S5_STENCIL_TEST_FUNC_MASK (0x7<<13) +#define S5_STENCIL_FAIL_SHIFT 10 +#define S5_STENCIL_FAIL_MASK (0x7<<10) +#define S5_STENCIL_PASS_Z_FAIL_SHIFT 7 +#define S5_STENCIL_PASS_Z_FAIL_MASK (0x7<<7) +#define S5_STENCIL_PASS_Z_PASS_SHIFT 4 +#define S5_STENCIL_PASS_Z_PASS_MASK (0x7<<4) +#define S5_STENCIL_WRITE_ENABLE (1<<3) +#define S5_STENCIL_TEST_ENABLE (1<<2) +#define S5_COLOR_DITHER_ENABLE (1<<1) +#define S5_LOGICOP_ENABLE (1<<0) + + +#define S6_ALPHA_TEST_ENABLE (1<<31) +#define S6_ALPHA_TEST_FUNC_SHIFT 28 +#define S6_ALPHA_TEST_FUNC_MASK (0x7<<28) +#define S6_ALPHA_REF_SHIFT 20 +#define S6_ALPHA_REF_MASK (0xff<<20) +#define S6_DEPTH_TEST_ENABLE (1<<19) +#define S6_DEPTH_TEST_FUNC_SHIFT 16 +#define S6_DEPTH_TEST_FUNC_MASK (0x7<<16) +#define S6_CBUF_BLEND_ENABLE (1<<15) +#define S6_CBUF_BLEND_FUNC_SHIFT 12 +#define S6_CBUF_BLEND_FUNC_MASK (0x7<<12) +#define S6_CBUF_SRC_BLEND_FACT_SHIFT 8 +#define S6_CBUF_SRC_BLEND_FACT_MASK (0xf<<8) +#define S6_CBUF_DST_BLEND_FACT_SHIFT 4 +#define S6_CBUF_DST_BLEND_FACT_MASK (0xf<<4) +#define S6_DEPTH_WRITE_ENABLE (1<<3) +#define S6_COLOR_WRITE_ENABLE (1<<2) +#define S6_TRISTRIP_PV_SHIFT 0 +#define S6_TRISTRIP_PV_MASK (0x3<<0) + +#define S7_DEPTH_OFFSET_CONST_MASK ~0 + + /* Helper macros for blend factors */ #define DST_BLND_FACT(f) ((f)<<S6_CBUF_DST_BLEND_FACT_SHIFT) @@ -743,4 +855,10 @@ #define _3DSTATE_DEFAULT_DIFFUSE ((0x3<<29)|(0x1d<<24)|(0x99<<16)) #define _3DSTATE_DEFAULT_SPECULAR ((0x3<<29)|(0x1d<<24)|(0x9a<<16)) + +#define MI_FLUSH ((0<<29)|(4<<23)) +#define FLUSH_MAP_CACHE (1<<0) +#define INHIBIT_FLUSH_RENDER_CACHE (1<<2) + + #endif diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 19f2206285..135bfaa265 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -39,7 +39,6 @@ #include "intel_batchbuffer.h" #include "intel_tex.h" #include "intel_regions.h" -#include "intel_tris.h" #include "i915_reg.h" #include "i915_context.h" @@ -297,9 +296,9 @@ i915_emit_state(struct intel_context *intel) { struct i915_context *i915 = i915_context(&intel->ctx); struct i915_hw_state *state = i915->current; - int i, count, aper_count; + int i; + int ret, count; GLuint dirty; - dri_bo *aper_array[3 + I915_TEX_UNITS]; GET_CURRENT_CONTEXT(ctx); BATCH_LOCALS; @@ -314,32 +313,28 @@ i915_emit_state(struct intel_context *intel) * Set the space as LOOP_CLIPRECTS now, since that's what our primitives * will be emitted under. */ - intel_batchbuffer_require_space(intel->batch, - get_state_size(state) + INTEL_PRIM_EMIT_SIZE, + intel_batchbuffer_require_space(intel->batch, get_state_size(state) + 8, LOOP_CLIPRECTS); count = 0; again: - aper_count = 0; dirty = get_dirty(state); - aper_array[aper_count++] = intel->batch->buf; + ret = 0; if (dirty & I915_UPLOAD_BUFFERS) { - aper_array[aper_count++] = state->draw_region->buffer; - if (state->depth_region) - aper_array[aper_count++] = state->depth_region->buffer; + ret |= dri_bufmgr_check_aperture_space(state->draw_region->buffer); + if (state->depth_region) + ret |= dri_bufmgr_check_aperture_space(state->depth_region->buffer); } if (dirty & I915_UPLOAD_TEX_ALL) { - for (i = 0; i < I915_TEX_UNITS; i++) { - if (dirty & I915_UPLOAD_TEX(i)) { - if (state->tex_buffer[i]) { - aper_array[aper_count++] = state->tex_buffer[i]; - } - } - } + for (i = 0; i < I915_TEX_UNITS; i++) + if (dirty & I915_UPLOAD_TEX(i)) { + if (state->tex_buffer[i]) { + ret |= dri_bufmgr_check_aperture_space(state->tex_buffer[i]); + } + } } - - if (dri_bufmgr_check_aperture_space(aper_array, aper_count)) { + if (ret) { if (count == 0) { count++; intel_batchbuffer_flush(intel->batch); @@ -382,14 +377,14 @@ i915_emit_state(struct intel_context *intel) OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR0]); OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR1]); OUT_RELOC(state->draw_region->buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, state->draw_region->draw_offset); if (state->depth_region) { OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR0]); OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR1]); OUT_RELOC(state->depth_region->buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, state->depth_region->draw_offset); } @@ -432,7 +427,7 @@ i915_emit_state(struct intel_context *intel) if (state->tex_buffer[i]) { OUT_RELOC(state->tex_buffer[i], - I915_GEM_DOMAIN_SAMPLER, 0, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, state->tex_offset[i]); } else if (state == &i915->meta) { @@ -634,5 +629,4 @@ i915InitVtbl(struct i915_context *i915) i915->intel.vtbl.flush_cmd = i915_flush_cmd; i915->intel.vtbl.assert_not_dirty = i915_assert_not_dirty; i915->intel.vtbl.note_unlock = i915_note_unlock; - i915->intel.vtbl.finish_batch = intel_finish_vb; } diff --git a/src/mesa/drivers/dri/i915/intel_bufmgr_ttm.c b/src/mesa/drivers/dri/i915/intel_bufmgr_ttm.c new file mode 120000 index 0000000000..e9df5c6279 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_bufmgr_ttm.c @@ -0,0 +1 @@ +../intel/intel_bufmgr_ttm.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i915/intel_pixel_read.c b/src/mesa/drivers/dri/i915/intel_pixel_read.c index d009590a4b..2e31656e98 100644 --- a/src/mesa/drivers/dri/i915/intel_pixel_read.c +++ b/src/mesa/drivers/dri/i915/intel_pixel_read.c @@ -173,6 +173,7 @@ do_blit_readpixels(GLcontext * ctx, struct intel_buffer_object *dst = intel_buffer_object(pack->BufferObj); GLuint dst_offset; GLuint rowLength; + dri_fence *fence = NULL; if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s\n", __FUNCTION__); @@ -263,7 +264,7 @@ do_blit_readpixels(GLcontext * ctx, intelEmitCopyBlit(intel, src->cpp, - src->pitch, src->buffer, 0, src->tiling, + src->pitch, src->buffer, 0, src->tiled, rowLength, dst_buffer, dst_offset, GL_FALSE, rect.x1, rect.y1, @@ -272,9 +273,19 @@ do_blit_readpixels(GLcontext * ctx, rect.x2 - rect.x1, rect.y2 - rect.y1, GL_COPY); } + + intel_batchbuffer_flush(intel->batch); + fence = intel->batch->last_fence; + dri_fence_reference(fence); + } UNLOCK_HARDWARE(intel); + if (fence) { + dri_fence_wait(fence); + dri_fence_unreference(fence); + } + if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s - DONE\n", __FUNCTION__); diff --git a/src/mesa/drivers/dri/i915/intel_render.c b/src/mesa/drivers/dri/i915/intel_render.c index 838d450378..5e6500cfa1 100644 --- a/src/mesa/drivers/dri/i915/intel_render.c +++ b/src/mesa/drivers/dri/i915/intel_render.c @@ -67,7 +67,7 @@ #define HAVE_ELTS 0 -static uint32_t hw_prim[GL_POLYGON + 1] = { +static GLuint hw_prim[GL_POLYGON + 1] = { 0, PRIM3D_LINELIST, PRIM3D_LINESTRIP, @@ -114,7 +114,7 @@ intelDmaPrimitive(struct intel_context *intel, GLenum prim) fprintf(stderr, "%s %s\n", __FUNCTION__, _mesa_lookup_enum_by_nr(prim)); INTEL_FIREVERTICES(intel); intel->vtbl.reduced_primitive_state(intel, reduced_prim[prim]); - intel_set_prim(intel, hw_prim[prim]); + intelStartInlinePrimitive(intel, hw_prim[prim], LOOP_CLIPRECTS); } @@ -126,11 +126,12 @@ do { \ #define FLUSH() INTEL_FIREVERTICES(intel) -#define GET_SUBSEQUENT_VB_MAX_VERTS() (INTEL_VB_SIZE / (intel->vertex_size * 4)) -#define GET_CURRENT_VB_MAX_VERTS() \ - ((INTEL_VB_SIZE - intel->prim.current_offset) / (intel->vertex_size * 4)) +#define GET_SUBSEQUENT_VB_MAX_VERTS() \ + ((intel->batch->size - 1500) / (intel->vertex_size*4)) +#define GET_CURRENT_VB_MAX_VERTS() GET_SUBSEQUENT_VB_MAX_VERTS() -#define ALLOC_VERTS(nr) intel_get_prim_space(intel, nr) +#define ALLOC_VERTS( nr ) \ + intelExtendInlinePrimitive( intel, (nr) * intel->vertex_size ) #define EMIT_VERTS( ctx, j, nr, buf ) \ _tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf ) diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c index 5f4b852212..bbb4e0f3cd 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.c +++ b/src/mesa/drivers/dri/i915/intel_tris.c @@ -25,12 +25,6 @@ * **************************************************************************/ -/** @file intel_tris.c - * - * This file contains functions for managing the vertex buffer and emitting - * primitives into it. - */ - #include "glheader.h" #include "context.h" #include "macros.h" @@ -53,185 +47,111 @@ #include "intel_reg.h" #include "intel_span.h" #include "intel_tex.h" -#include "intel_chipset.h" -#include "i830_context.h" -#include "i830_reg.h" static void intelRenderPrimitive(GLcontext * ctx, GLenum prim); static void intelRasterPrimitive(GLcontext * ctx, GLenum rprim, GLuint hwprim); -/** Sets the primitive type for a primitive sequence, flushing as needed. */ -void intel_set_prim(struct intel_context *intel, uint32_t prim) +/* + */ +static void +intel_flush_inline_primitive(struct intel_context *intel) { - if (prim != intel->prim.primitive) { - INTEL_FIREVERTICES(intel); - intel->prim.primitive = prim; - } -} + GLuint used = intel->batch->ptr - intel->prim.start_ptr; -/** Returns mapped VB space for the given number of vertices */ -uint32_t *intel_get_prim_space(struct intel_context *intel, unsigned int count) -{ - uint32_t *addr; - - /* Check for space in the existing VB */ - if (intel->prim.vb_bo == NULL || - (intel->prim.current_offset + - count * intel->vertex_size * 4) > INTEL_VB_SIZE || - (intel->prim.count + count) >= (1 << 16)) { - /* Flush existing prim if any */ - INTEL_FIREVERTICES(intel); + assert(intel->prim.primitive != ~0); - intel_finish_vb(intel); +/* _mesa_printf("/\n"); */ - /* Start a new VB */ - if (intel->prim.vb == NULL) - intel->prim.vb = malloc(INTEL_VB_SIZE); - intel->prim.vb_bo = dri_bo_alloc(intel->bufmgr, "vb", - INTEL_VB_SIZE, 4); - intel->prim.start_offset = 0; - intel->prim.current_offset = 0; - } + if (used < 8) + goto do_discard; + + *(int *) intel->prim.start_ptr = (_3DPRIMITIVE | + intel->prim.primitive | (used / 4 - 2)); - intel->prim.flush = intel_flush_prim; + goto finished; - addr = (uint32_t *)(intel->prim.vb + intel->prim.current_offset); - intel->prim.current_offset += intel->vertex_size * 4 * count; - intel->prim.count += count; + do_discard: + intel->batch->ptr -= used; - return addr; + finished: + intel->prim.primitive = ~0; + intel->prim.start_ptr = 0; + intel->prim.flush = 0; } -/** Dispatches the accumulated primitive to the batchbuffer. */ -void intel_flush_prim(struct intel_context *intel) + +/* Emit a primitive referencing vertices in a vertex buffer. + */ +void +intelStartInlinePrimitive(struct intel_context *intel, + GLuint prim, GLuint batch_flags) { BATCH_LOCALS; - dri_bo *aper_array[2]; - dri_bo *vb_bo; - - /* Must be called after an intel_start_prim. */ - assert(intel->prim.primitive != ~0); - - if (intel->prim.count == 0) - return; - - /* Keep a reference on the BO as it may get finished as we start the - * batch emit. - */ - vb_bo = intel->prim.vb_bo; - dri_bo_reference(vb_bo); intel_wait_flips(intel); intel->vtbl.emit_state(intel); - aper_array[0] = intel->batch->buf; - aper_array[1] = vb_bo; - if (dri_bufmgr_check_aperture_space(aper_array, 2)) { - intel_batchbuffer_flush(intel->batch); - intel->vtbl.emit_state(intel); - } - - /* Ensure that we don't start a new batch for the following emit, which - * depends on the state just emitted. emit_state should be making sure we - * have the space for this. - */ intel->no_batch_wrap = GL_TRUE; - /* Check that we actually emitted the state into this batch, using the - * UPLOAD_CTX bit as the signal. +/* _mesa_printf("%s *", __progname); */ + + /* Emit a slot which will be filled with the inline primitive + * command later. */ + BEGIN_BATCH(2, batch_flags); + OUT_BATCH(0); + assert((intel->batch->dirty_state & (1<<1)) == 0); -#if 0 - printf("emitting %d..%d=%d vertices size %d\n", intel->prim.start_offset, - intel->prim.current_offset, intel->prim.count, - intel->vertex_size * 4); -#endif + intel->prim.start_ptr = intel->batch->ptr; + intel->prim.primitive = prim; + intel->prim.flush = intel_flush_inline_primitive; - if (IS_9XX(intel->intelScreen->deviceID)) { - BEGIN_BATCH(5, LOOP_CLIPRECTS); - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | - I1_LOAD_S(0) | I1_LOAD_S(1) | 1); - assert((intel->prim.start_offset & !S0_VB_OFFSET_MASK) == 0); - OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, - intel->prim.start_offset); - OUT_BATCH((intel->vertex_size << S1_VERTEX_WIDTH_SHIFT) | - (intel->vertex_size << S1_VERTEX_PITCH_SHIFT)); - - OUT_BATCH(_3DPRIMITIVE | - PRIM_INDIRECT | - PRIM_INDIRECT_SEQUENTIAL | - intel->prim.primitive | - intel->prim.count); - OUT_BATCH(0); /* Beginning vertex index */ - ADVANCE_BATCH(); - } else { - struct i830_context *i830 = i830_context(&intel->ctx); - - BEGIN_BATCH(5, LOOP_CLIPRECTS); - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | - I1_LOAD_S(0) | I1_LOAD_S(2) | 1); - /* S0 */ - assert((intel->prim.start_offset & !S0_VB_OFFSET_MASK_830) == 0); - OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, - intel->prim.start_offset | - (intel->vertex_size << S0_VB_PITCH_SHIFT_830) | - S0_VB_ENABLE_830); - /* S1 - * This is somewhat unfortunate -- VB width is tied up with - * vertex format data that we've already uploaded through - * _3DSTATE_VFT[01]_CMD. We may want to replace emits of VFT state with - * STATE_IMMEDIATE_1 like this to avoid duplication. - */ - OUT_BATCH((i830->state.Ctx[I830_CTXREG_VF] & VFT0_TEX_COUNT_MASK) >> - VFT0_TEX_COUNT_SHIFT << S2_TEX_COUNT_SHIFT_830 | - (i830->state.Ctx[I830_CTXREG_VF2] << 16) | - intel->vertex_size << S2_VERTEX_0_WIDTH_SHIFT_830); - - OUT_BATCH(_3DPRIMITIVE | - PRIM_INDIRECT | - PRIM_INDIRECT_SEQUENTIAL | - intel->prim.primitive | - intel->prim.count); - OUT_BATCH(0); /* Beginning vertex index */ - ADVANCE_BATCH(); - } + OUT_BATCH(0); + ADVANCE_BATCH(); intel->no_batch_wrap = GL_FALSE; - intel->prim.flush = NULL; - intel->prim.start_offset = intel->prim.current_offset; - if (!IS_9XX(intel->intelScreen->deviceID)) - intel->prim.start_offset = ALIGN(intel->prim.start_offset, 128); - intel->prim.count = 0; +/* _mesa_printf(">"); */ +} + + +void +intelWrapInlinePrimitive(struct intel_context *intel) +{ + GLuint prim = intel->prim.primitive; + enum cliprect_mode cliprect_mode = intel->batch->cliprect_mode; - dri_bo_unreference(vb_bo); + intel_flush_inline_primitive(intel); + intel_batchbuffer_flush(intel->batch); + intelStartInlinePrimitive(intel, prim, cliprect_mode); /* ??? */ } -/** - * Uploads the locally-accumulated VB into the buffer object. - * - * This avoids us thrashing the cachelines in and out as the buffer gets - * filled, dispatched, then reused as the hardware completes rendering from it, - * and also lets us clflush less if we dispatch with a partially-filled VB. - * - * This is called normally from get_space when we're finishing a BO, but also - * at batch flush time so that we don't try accessing the contents of a - * just-dispatched buffer. - */ -void intel_finish_vb(struct intel_context *intel) +GLuint * +intelExtendInlinePrimitive(struct intel_context *intel, GLuint dwords) { - if (intel->prim.vb_bo == NULL) - return; + GLuint sz = dwords * sizeof(GLuint); + GLuint *ptr; + + assert(intel->prim.flush == intel_flush_inline_primitive); + + if (intel_batchbuffer_space(intel->batch) < sz) + intelWrapInlinePrimitive(intel); + +/* _mesa_printf("."); */ + + intel->vtbl.assert_not_dirty(intel); - dri_bo_subdata(intel->prim.vb_bo, 0, intel->prim.start_offset, - intel->prim.vb); - dri_bo_unreference(intel->prim.vb_bo); - intel->prim.vb_bo = NULL; + ptr = (GLuint *) intel->batch->ptr; + intel->batch->ptr += sz; + + return ptr; } + + /*********************************************************************** * Emit primitives as inline vertices * ***********************************************************************/ @@ -262,7 +182,7 @@ intel_draw_quad(struct intel_context *intel, intelVertexPtr v1, intelVertexPtr v2, intelVertexPtr v3) { GLuint vertsize = intel->vertex_size; - GLuint *vb = intel_get_prim_space(intel, 6); + GLuint *vb = intelExtendInlinePrimitive(intel, 6 * vertsize); int j; COPY_DWORDS(j, vb, vertsize, v0); @@ -290,7 +210,7 @@ intel_draw_triangle(struct intel_context *intel, intelVertexPtr v0, intelVertexPtr v1, intelVertexPtr v2) { GLuint vertsize = intel->vertex_size; - GLuint *vb = intel_get_prim_space(intel, 3); + GLuint *vb = intelExtendInlinePrimitive(intel, 3 * vertsize); int j; COPY_DWORDS(j, vb, vertsize, v0); @@ -304,7 +224,7 @@ intel_draw_line(struct intel_context *intel, intelVertexPtr v0, intelVertexPtr v1) { GLuint vertsize = intel->vertex_size; - GLuint *vb = intel_get_prim_space(intel, 2); + GLuint *vb = intelExtendInlinePrimitive(intel, 2 * vertsize); int j; COPY_DWORDS(j, vb, vertsize, v0); @@ -316,7 +236,7 @@ static void intel_draw_point(struct intel_context *intel, intelVertexPtr v0) { GLuint vertsize = intel->vertex_size; - GLuint *vb = intel_get_prim_space(intel, 1); + GLuint *vb = intelExtendInlinePrimitive(intel, vertsize); int j; /* Adjust for sub pixel position -- still required for conform. */ @@ -825,7 +745,7 @@ intelFastRenderClippedPoly(GLcontext * ctx, const GLuint * elts, GLuint n) { struct intel_context *intel = intel_context(ctx); const GLuint vertsize = intel->vertex_size; - GLuint *vb = intel_get_prim_space(intel, (n - 2) * 3); + GLuint *vb = intelExtendInlinePrimitive(intel, (n - 2) * 3 * vertsize); GLubyte *vertptr = (GLubyte *) intel->verts; const GLuint *start = (const GLuint *) V(elts[0]); int i, j; @@ -1030,7 +950,7 @@ intelRasterPrimitive(GLcontext * ctx, GLenum rprim, GLuint hwprim) if (hwprim != intel->prim.primitive) { INTEL_FIREVERTICES(intel); - intel_set_prim(intel, hwprim); + intelStartInlinePrimitive(intel, hwprim, LOOP_CLIPRECTS); } } @@ -1163,18 +1083,15 @@ intel_meta_draw_poly(struct intel_context *intel, union fi *vb; GLint i; GLboolean was_locked = intel->locked; - unsigned int saved_vertex_size = intel->vertex_size; if (!was_locked) LOCK_HARDWARE(intel); - intel->vertex_size = 6; - /* All 3d primitives should be emitted with LOOP_CLIPRECTS, * otherwise the drawing origin (DR4) might not be set correctly. */ - intel_set_prim(intel, PRIM3D_TRIFAN); - vb = (union fi *) intel_get_prim_space(intel, n); + intelStartInlinePrimitive(intel, PRIM3D_TRIFAN, LOOP_CLIPRECTS); + vb = (union fi *) intelExtendInlinePrimitive(intel, n * 6); for (i = 0; i < n; i++) { vb[0].f = xy[i][0]; @@ -1188,8 +1105,6 @@ intel_meta_draw_poly(struct intel_context *intel, INTEL_FIREVERTICES(intel); - intel->vertex_size = saved_vertex_size; - if (!was_locked) UNLOCK_HARDWARE(intel); } diff --git a/src/mesa/drivers/dri/i915/intel_tris.h b/src/mesa/drivers/dri/i915/intel_tris.h index 0e08986221..021e5c6450 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.h +++ b/src/mesa/drivers/dri/i915/intel_tris.h @@ -30,9 +30,7 @@ #include "mtypes.h" -#define INTEL_VB_SIZE (32 * 1024) -/** 3 dwords of state_immediate and 2 of 3dprim, in intel_flush_prim */ -#define INTEL_PRIM_EMIT_SIZE (5 * 4) + #define _INTEL_NEW_RENDERSTATE (_DD_NEW_LINE_STIPPLE | \ _DD_NEW_TRI_UNFILLED | \ @@ -46,9 +44,11 @@ extern void intelInitTriFuncs(GLcontext * ctx); extern void intelChooseRenderState(GLcontext * ctx); -void intel_set_prim(struct intel_context *intel, uint32_t prim); -GLuint *intel_get_prim_space(struct intel_context *intel, unsigned int count); -void intel_flush_prim(struct intel_context *intel); -void intel_finish_vb(struct intel_context *intel); +extern void intelStartInlinePrimitive(struct intel_context *intel, + GLuint prim, GLuint flags); +extern void intelWrapInlinePrimitive(struct intel_context *intel); + +GLuint *intelExtendInlinePrimitive(struct intel_context *intel, + GLuint dwords); #endif diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 8fa205e79f..c561b744d1 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -9,6 +9,7 @@ DRIVER_SOURCES = \ intel_blit.c \ intel_buffer_objects.c \ intel_buffers.c \ + intel_bufmgr_ttm.c \ intel_context.c \ intel_decode.c \ intel_depthstencil.c \ @@ -84,6 +85,7 @@ DRIVER_SOURCES = \ C_SOURCES = \ $(COMMON_SOURCES) \ + $(COMMON_BM_SOURCES) \ $(MINIGLX_SOURCES) \ $(DRIVER_SOURCES) diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index d662cf7521..9d8984f05c 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -37,7 +37,7 @@ #include "macros.h" #include "enums.h" -static void prepare_cc_vp( struct brw_context *brw ) +static int upload_cc_vp( struct brw_context *brw ) { struct brw_cc_viewport ccv; @@ -48,6 +48,7 @@ static void prepare_cc_vp( struct brw_context *brw ) dri_bo_unreference(brw->cc.vp_bo); brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 ); + return dri_bufmgr_check_aperture_space(brw->cc.vp_bo); } const struct brw_tracked_state brw_cc_vp = { @@ -56,7 +57,7 @@ const struct brw_tracked_state brw_cc_vp = { .brw = BRW_NEW_CONTEXT, .cache = 0 }, - .prepare = prepare_cc_vp + .prepare = upload_cc_vp }; struct brw_cc_unit_key { @@ -255,17 +256,16 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) NULL, NULL); /* Emit CC viewport relocation */ - intel_bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, - 0, - 0, - offsetof(struct brw_cc_unit_state, cc4), - brw->cc.vp_bo); + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + 0, + offsetof(struct brw_cc_unit_state, cc4), + brw->cc.vp_bo); return bo; } -static void prepare_cc_unit( struct brw_context *brw ) +static int prepare_cc_unit( struct brw_context *brw ) { struct brw_cc_unit_key key; @@ -279,6 +279,7 @@ static void prepare_cc_unit( struct brw_context *brw ) if (brw->cc.state_bo == NULL) brw->cc.state_bo = cc_unit_create_from_key(brw, &key); + return dri_bufmgr_check_aperture_space(brw->cc.state_bo); } const struct brw_tracked_state brw_cc_unit = { diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c index 22981fd2d9..540108e5f4 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.c +++ b/src/mesa/drivers/dri/i965/brw_clip.c @@ -131,7 +131,7 @@ static void compile_clip_prog( struct brw_context *brw, /* Calculate interpolants for triangle and line rasterization. */ -static void upload_clip_prog(struct brw_context *brw) +static int upload_clip_prog( struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; struct brw_clip_prog_key key; @@ -242,6 +242,8 @@ static void upload_clip_prog(struct brw_context *brw) &brw->clip.prog_data); if (brw->clip.prog_bo == NULL) compile_clip_prog( brw, &key ); + + return dri_bufmgr_check_aperture_space(brw->clip.prog_bo); } diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index ae904c6253..2d0b24c5ca 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -119,19 +119,19 @@ clip_unit_create_from_key(struct brw_context *brw, /* Emit clip program relocation */ assert(brw->clip.prog_bo); - intel_bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, - 0, - clip.thread0.grf_reg_count << 1, - offsetof(struct brw_clip_unit_state, thread0), - brw->clip.prog_bo); + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + clip.thread0.grf_reg_count << 1, + offsetof(struct brw_clip_unit_state, thread0), + brw->clip.prog_bo); return bo; } -static void upload_clip_unit( struct brw_context *brw ) +static int upload_clip_unit( struct brw_context *brw ) { struct brw_clip_unit_key key; + int ret = 0; clip_unit_populate_key(brw, &key); @@ -143,6 +143,9 @@ static void upload_clip_unit( struct brw_context *brw ) if (brw->clip.state_bo == NULL) { brw->clip.state_bo = clip_unit_create_from_key(brw, &key); } + + ret = dri_bufmgr_check_aperture_space(brw->clip.state_bo); + return ret; } const struct brw_tracked_state brw_clip_unit = { diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index b4e2619929..1c7ad5c9c8 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -134,6 +134,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, ctx->Const.Max3DTextureLevels = 9; ctx->Const.MaxCubeTextureLevels = 12; ctx->Const.MaxTextureRectSize = (1<<11); + ctx->Const.MaxTextureUnits = BRW_MAX_TEX_UNIT; /* ctx->Const.MaxNativeVertexProgramTemps = 32; */ diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 330d5714da..32e05542e0 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -135,8 +135,6 @@ struct brw_context; #define BRW_NEW_METAOPS 0x1000 #define BRW_NEW_FENCE 0x2000 #define BRW_NEW_LOCK 0x4000 -#define BRW_NEW_INDICES 0x8000 -#define BRW_NEW_VERTICES 0x10000 /** * Used for any batch entry with a relocated pointer that will be used * by any 3D rendering. @@ -334,7 +332,7 @@ struct brw_state_pointers { */ struct brw_tracked_state { struct brw_state_flags dirty; - void (*prepare)( struct brw_context *brw ); + int (*prepare)( struct brw_context *brw ); void (*emit)( struct brw_context *brw ); }; @@ -452,22 +450,9 @@ struct brw_context * for changes to this state: */ struct brw_vertex_info info; - unsigned int min_index, max_index; } vb; struct { - /** - * Index buffer for this draw_prims call. - * - * Updates are signaled by BRW_NEW_INDICES. - */ - const struct _mesa_index_buffer *ib; - - dri_bo *bo; - unsigned int offset; - } ib; - - struct { /* Will be allocated on demand if needed. */ struct brw_state_pointers attribs; @@ -656,7 +641,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, /*====================================================================== * brw_state.c */ -void brw_validate_state( struct brw_context *brw ); +int brw_validate_state( struct brw_context *brw ); void brw_init_state( struct brw_context *brw ); void brw_destroy_state( struct brw_context *brw ); diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 0a3600193b..5ff4e2964e 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -46,7 +46,7 @@ /* Partition the CURBE between the various users of constant values: */ -static void calculate_curbe_offsets( struct brw_context *brw ) +static int calculate_curbe_offsets( struct brw_context *brw ) { /* CACHE_NEW_WM_PROG */ GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16; @@ -117,6 +117,7 @@ static void calculate_curbe_offsets( struct brw_context *brw ) brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS; } + return 0; } @@ -155,7 +156,19 @@ void brw_upload_constant_buffer_state(struct brw_context *brw) assert(brw->urb.nr_cs_entries); BRW_CACHED_BATCH_STRUCT(brw, &cbs); -} +} + +#if 0 +const struct brw_tracked_state brw_constant_buffer_state = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_URB_FENCE, + .cache = 0 + }, + .update = brw_upload_constant_buffer_state +}; +#endif + static GLfloat fixed_plane[6][4] = { { 0, 0, -1, 1 }, @@ -170,7 +183,7 @@ static GLfloat fixed_plane[6][4] = { * cache mechanism, but maybe would benefit from a comparison against * the current uploaded set of constants. */ -static void prepare_constant_buffer(struct brw_context *brw) +static int prepare_constant_buffer(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; @@ -194,8 +207,8 @@ static void prepare_constant_buffer(struct brw_context *brw) brw->curbe.last_buf = NULL; brw->curbe.last_bufsz = 0; } - - return; + + return 0; } buf = (GLfloat *)malloc(bufsz); @@ -293,7 +306,10 @@ static void prepare_constant_buffer(struct brw_context *brw) * They're generally around 64b. */ brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE", - 4096, 1 << 6); + 4096, 1 << 6, + DRM_BO_FLAG_MEM_LOCAL | + DRM_BO_FLAG_CACHED | + DRM_BO_FLAG_CACHED_MAPPED); brw->curbe.curbe_next_offset = 0; } @@ -320,6 +336,9 @@ static void prepare_constant_buffer(struct brw_context *brw) * flushes as necessary when doublebuffering of CURBEs isn't * possible. */ + + /* check aperture space for this bo */ + return dri_bufmgr_check_aperture_space(brw->curbe.curbe_bo); } @@ -327,13 +346,6 @@ static void emit_constant_buffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; GLuint sz = brw->curbe.total_size; - dri_bo *aper_array[] = { - brw->intel.batch->buf, - brw->curbe.curbe_bo, - }; - - if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) - intel_batchbuffer_flush(intel->batch); BEGIN_BATCH(2, IGNORE_CLIPRECTS); if (sz == 0) { @@ -341,8 +353,7 @@ static void emit_constant_buffer(struct brw_context *brw) OUT_BATCH(0); } else { OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2)); - OUT_RELOC(brw->curbe.curbe_bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, + OUT_RELOC(brw->curbe.curbe_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, (sz - 1) + brw->curbe.curbe_offset); } ADVANCE_BATCH(); diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 0593e8d5f5..92c058ade8 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -33,6 +33,69 @@ #ifndef BRW_DEFINES_H #define BRW_DEFINES_H +/* + */ +#define MI_NOOP 0x00 +#define MI_USER_INTERRUPT 0x02 +#define MI_WAIT_FOR_EVENT 0x03 +#define MI_FLUSH 0x04 +#define MI_REPORT_HEAD 0x07 +#define MI_ARB_ON_OFF 0x08 +#define MI_BATCH_BUFFER_END 0x0A +#define MI_OVERLAY_FLIP 0x11 +#define MI_LOAD_SCAN_LINES_INCL 0x12 +#define MI_LOAD_SCAN_LINES_EXCL 0x13 +#define MI_DISPLAY_BUFFER_INFO 0x14 +#define MI_SET_CONTEXT 0x18 +#define MI_STORE_DATA_IMM 0x20 +#define MI_STORE_DATA_INDEX 0x21 +#define MI_LOAD_REGISTER_IMM 0x22 +#define MI_STORE_REGISTER_MEM 0x24 +#define MI_BATCH_BUFFER_START 0x31 + +#define MI_SYNCHRONOUS_FLIP 0x0 +#define MI_ASYNCHRONOUS_FLIP 0x1 + +#define MI_BUFFER_SECURE 0x0 +#define MI_BUFFER_NONSECURE 0x1 + +#define MI_ARBITRATE_AT_CHAIN_POINTS 0x0 +#define MI_ARBITRATE_BETWEEN_INSTS 0x1 +#define MI_NO_ARBITRATION 0x3 + +#define MI_CONDITION_CODE_WAIT_DISABLED 0x0 +#define MI_CONDITION_CODE_WAIT_0 0x1 +#define MI_CONDITION_CODE_WAIT_1 0x2 +#define MI_CONDITION_CODE_WAIT_2 0x3 +#define MI_CONDITION_CODE_WAIT_3 0x4 +#define MI_CONDITION_CODE_WAIT_4 0x5 + +#define MI_DISPLAY_PIPE_A 0x0 +#define MI_DISPLAY_PIPE_B 0x1 + +#define MI_DISPLAY_PLANE_A 0x0 +#define MI_DISPLAY_PLANE_B 0x1 +#define MI_DISPLAY_PLANE_C 0x2 + +#define MI_STANDARD_FLIP 0x0 +#define MI_ENQUEUE_FLIP_PERFORM_BASE_FRAME_NUMBER_LOAD 0x1 +#define MI_ENQUEUE_FLIP_TARGET_FRAME_NUMBER_RELATIVE 0x2 +#define MI_ENQUEUE_FLIP_ABSOLUTE_TARGET_FRAME_NUMBER 0x3 + +#define MI_PHYSICAL_ADDRESS 0x0 +#define MI_VIRTUAL_ADDRESS 0x1 + +#define MI_BUFFER_MEMORY_MAIN 0x0 +#define MI_BUFFER_MEMORY_GTT 0x2 +#define MI_BUFFER_MEMORY_PER_PROCESS_GTT 0x3 + +#define MI_FLIP_CONTINUE 0x0 +#define MI_FLIP_ON 0x1 +#define MI_FLIP_OFF 0x2 + +#define MI_UNTRUSTED_REGISTER_SPACE 0x0 +#define MI_TRUSTED_REGISTER_SPACE 0x1 + /* 3D state: */ #define _3DOP_3DSTATE_PIPELINED 0x0 @@ -56,6 +119,7 @@ #define _3DSTATE_LINE_STIPPLE 0x08 #define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09 #define _3DCONTROL 0x00 +#define _3DPRIMITIVE 0x00 #define PIPE_CONTROL_NOWRITE 0x00 #define PIPE_CONTROL_WRITEIMMEDIATE 0x01 diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index d43b52c2c7..f90c5f7b08 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -83,8 +83,9 @@ static const GLenum reduced_prim[GL_POLYGON+1] = { * programs be immune to the active primitive (ie. cope with all * possibilities). That may not be realistic however. */ -static GLuint brw_set_prim(struct brw_context *brw, GLenum prim) +static GLuint brw_set_prim(struct brw_context *brw, GLenum prim, GLboolean *need_flush) { + int ret; if (INTEL_DEBUG & DEBUG_PRIMS) _mesa_printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim)); @@ -105,7 +106,9 @@ static GLuint brw_set_prim(struct brw_context *brw, GLenum prim) brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE; } - brw_validate_state(brw); + ret = brw_validate_state(brw); + if (ret) + *need_flush = GL_TRUE; } return hw_prim[prim]; @@ -128,6 +131,7 @@ static void brw_emit_prim( struct brw_context *brw, { struct brw_3d_primitive prim_packet; + GLboolean need_flush = GL_FALSE; if (INTEL_DEBUG & DEBUG_PRIMS) _mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), @@ -136,7 +140,7 @@ static void brw_emit_prim( struct brw_context *brw, prim_packet.header.opcode = CMD_3D_PRIM; prim_packet.header.length = sizeof(prim_packet)/4 - 2; prim_packet.header.pad = 0; - prim_packet.header.topology = brw_set_prim(brw, prim->mode); + prim_packet.header.topology = brw_set_prim(brw, prim->mode, &need_flush); prim_packet.header.indexed = prim->indexed; prim_packet.verts_per_instance = trim(prim->mode, prim->count); @@ -145,13 +149,12 @@ static void brw_emit_prim( struct brw_context *brw, prim_packet.start_instance_location = 0; prim_packet.base_vert_location = 0; - /* Can't wrap here, since we rely on the validated state. */ - brw->no_batch_wrap = GL_TRUE; if (prim_packet.verts_per_instance) { intel_batchbuffer_data( brw->intel.batch, &prim_packet, sizeof(prim_packet), LOOP_CLIPRECTS); } - brw->no_batch_wrap = GL_FALSE; + + assert(need_flush == GL_FALSE); } static void brw_merge_inputs( struct brw_context *brw, @@ -255,6 +258,10 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, struct brw_context *brw = brw_context(ctx); GLboolean retval = GL_FALSE; GLuint i; + GLuint ib_offset; + dri_bo *ib_bo; + GLboolean force_flush = GL_FALSE; + int ret; if (ctx->NewState) _mesa_update_state( ctx ); @@ -264,13 +271,7 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, /* Bind all inputs, derive varying and size information: */ brw_merge_inputs( brw, arrays ); - - brw->ib.ib = ib; - brw->state.dirty.brw |= BRW_NEW_INDICES; - - brw->vb.min_index = min_index; - brw->vb.max_index = max_index; - brw->state.dirty.brw |= BRW_NEW_VERTICES; + /* Have to validate state quite late. Will rebuild tnl_program, * which depends on varying information. * @@ -293,18 +294,29 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, * an upper bound of how much we might emit in a single * brw_try_draw_prims(). */ + flush: + if (force_flush) + brw->no_batch_wrap = GL_FALSE; + if (intel->batch->ptr - intel->batch->map > intel->batch->size * 3 / 4 /* brw_emit_prim may change the cliprect_mode to LOOP_CLIPRECTS */ - || intel->batch->cliprect_mode != LOOP_CLIPRECTS) + || intel->batch->cliprect_mode != LOOP_CLIPRECTS || (force_flush == GL_TRUE)) intel_batchbuffer_flush(intel->batch); + force_flush = GL_FALSE; + brw->no_batch_wrap = GL_TRUE; + /* Set the first primitive early, ahead of validate_state: */ - brw_set_prim(brw, prim[0].mode); + brw_set_prim(brw, prim[0].mode, &force_flush); /* XXX: Need to separate validate and upload of state. */ - brw_validate_state( brw ); + ret = brw_validate_state( brw ); + if (ret) { + force_flush = GL_TRUE; + goto flush; + } /* Various fallback checks: */ @@ -314,6 +326,31 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, if (check_fallbacks( brw, prim, nr_prims )) goto out; + /* need to account for index buffer and vertex buffer */ + if (ib) { + ret = brw_prepare_indices( brw, ib , &ib_bo, &ib_offset); + if (ret) { + force_flush = GL_TRUE; + goto flush; + } + } + + ret = brw_prepare_vertices( brw, min_index, max_index); + if (ret < 0) + goto out; + + if (ret > 0) { + force_flush = GL_TRUE; + goto flush; + } + + /* Upload index, vertex data: + */ + if (ib) + brw_emit_indices( brw, ib, ib_bo, ib_offset); + + brw_emit_vertices( brw, min_index, max_index); + for (i = 0; i < nr_prims; i++) { brw_emit_prim(brw, &prim[i]); } @@ -322,6 +359,9 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, } out: + + brw->no_batch_wrap = GL_FALSE; + UNLOCK_HARDWARE(intel); if (!retval) diff --git a/src/mesa/drivers/dri/i965/brw_draw.h b/src/mesa/drivers/dri/i965/brw_draw.h index 2a3e0c1c5b..b3547400d4 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.h +++ b/src/mesa/drivers/dri/i965/brw_draw.h @@ -51,4 +51,27 @@ void brw_draw_destroy( struct brw_context *brw ); void brw_init_current_values(GLcontext *ctx, struct gl_client_array *arrays); + +/* brw_draw_upload.c + */ +int brw_prepare_indices( struct brw_context *brw, + const struct _mesa_index_buffer *index_buffer, + dri_bo **bo_return, + GLuint *offset_return); + +void brw_emit_indices( struct brw_context *brw, + const struct _mesa_index_buffer *index_buffer, + dri_bo *bo, + GLuint offset); + +int brw_prepare_vertices( struct brw_context *brw, + GLuint min_index, + GLuint max_index ); + +void brw_emit_vertices( struct brw_context *brw, + GLuint min_index, + GLuint max_index ); + + + #endif diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 0122d18f82..7946ffd9d9 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -228,7 +228,10 @@ static void wrap_buffers( struct brw_context *brw, if (brw->vb.upload.bo != NULL) dri_bo_unreference(brw->vb.upload.bo); brw->vb.upload.bo = dri_bo_alloc(brw->intel.bufmgr, "temporary VBO", - size, 1); + size, 1, + DRM_BO_FLAG_MEM_LOCAL | + DRM_BO_FLAG_CACHED | + DRM_BO_FLAG_CACHED_MAPPED); /* Set the internal VBO\ to no-backing-store. We only use them as a * temporary within a brw_try_draw_prims while the lock is held. @@ -301,7 +304,9 @@ copy_array_to_vbo_array( struct brw_context *brw, } } -static void brw_prepare_vertices(struct brw_context *brw) +int brw_prepare_vertices( struct brw_context *brw, + GLuint min_index, + GLuint max_index ) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); @@ -309,8 +314,7 @@ static void brw_prepare_vertices(struct brw_context *brw) GLuint i; const unsigned char *ptr = NULL; GLuint interleave = 0; - unsigned int min_index = brw->vb.min_index; - unsigned int max_index = brw->vb.max_index; + int ret = 0; struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; GLuint nr_enabled = 0; @@ -338,10 +342,8 @@ static void brw_prepare_vertices(struct brw_context *brw) * cases with > 17 vertex attributes enabled, so it probably * isn't an issue at this point. */ - if (nr_enabled >= BRW_VEP_MAX) { - intel->Fallback = 1; - return; - } + if (nr_enabled >= BRW_VEP_MAX) + return -1; for (i = 0; i < nr_enabled; i++) { struct brw_vertex_element *input = enabled[i]; @@ -359,6 +361,8 @@ static void brw_prepare_vertices(struct brw_context *brw) dri_bo_reference(input->bo); input->offset = (unsigned long)input->glarray->Ptr; input->stride = input->glarray->StrideB; + + ret |= dri_bufmgr_check_aperture_space(input->bo); } else { /* Queue the buffer object up to be uploaded in the next pass, * when we've decided if we're doing interleaved or not. @@ -367,7 +371,7 @@ static void brw_prepare_vertices(struct brw_context *brw) /* Position array not properly enabled: */ if (input->glarray->StrideB == 0) - return; + return -1; interleave = input->glarray->StrideB; ptr = input->glarray->Ptr; @@ -399,6 +403,7 @@ static void brw_prepare_vertices(struct brw_context *brw) */ copy_array_to_vbo_array(brw, upload[0], interleave); + ret |= dri_bufmgr_check_aperture_space(upload[0]->bo); for (i = 1; i < nr_uploads; i++) { /* Then, just point upload[i] at upload[0]'s buffer. */ upload[i]->stride = interleave; @@ -412,11 +417,23 @@ static void brw_prepare_vertices(struct brw_context *brw) /* Upload non-interleaved arrays */ for (i = 0; i < nr_uploads; i++) { copy_array_to_vbo_array(brw, upload[i], upload[i]->element_size); + if (upload[i]->bo) { + ret |= dri_bufmgr_check_aperture_space(upload[i]->bo); + } } } + + + if (ret) + return 1; + + + return 0; } -static void brw_emit_vertices(struct brw_context *brw) +void brw_emit_vertices( struct brw_context *brw, + GLuint min_index, + GLuint max_index ) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); @@ -452,9 +469,9 @@ static void brw_emit_vertices(struct brw_context *brw) BRW_VB0_ACCESS_VERTEXDATA | (input->stride << BRW_VB0_PITCH_SHIFT)); OUT_RELOC(input->bo, - I915_GEM_DOMAIN_VERTEX, 0, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, input->offset); - OUT_BATCH(brw->vb.max_index); + OUT_BATCH(max_index); OUT_BATCH(0); /* Instance data step rate */ /* Unreference the buffer so it can get freed, now that we won't @@ -498,31 +515,18 @@ static void brw_emit_vertices(struct brw_context *brw) ADVANCE_BATCH(); } -const struct brw_tracked_state brw_vertices = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BATCH | BRW_NEW_VERTICES, - .cache = 0, - }, - .prepare = brw_prepare_vertices, - .emit = brw_emit_vertices, -}; - -static void brw_prepare_indices(struct brw_context *brw) +int brw_prepare_indices( struct brw_context *brw, + const struct _mesa_index_buffer *index_buffer, + dri_bo **bo_return, + GLuint *offset_return) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; - const struct _mesa_index_buffer *index_buffer = brw->ib.ib; - GLuint ib_size; + GLuint ib_size = get_size(index_buffer->type) * index_buffer->count; dri_bo *bo; - struct gl_buffer_object *bufferobj; - GLuint offset; - - if (index_buffer == NULL) - return; - - ib_size = get_size(index_buffer->type) * index_buffer->count; - bufferobj = index_buffer->obj;; + struct gl_buffer_object *bufferobj = index_buffer->obj; + GLuint offset = (GLuint)index_buffer->ptr; + int ret; /* Turn into a proper VBO: */ @@ -536,8 +540,6 @@ static void brw_prepare_indices(struct brw_context *brw) */ dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr); } else { - offset = (GLuint)index_buffer->ptr; - /* If the index buffer isn't aligned to its element size, we have to * rebase it into a temporary. */ @@ -560,22 +562,19 @@ static void brw_prepare_indices(struct brw_context *brw) } } - dri_bo_unreference(brw->ib.bo); - brw->ib.bo = bo; - brw->ib.offset = offset; + *bo_return = bo; + *offset_return = offset; + ret = dri_bufmgr_check_aperture_space(bo); + return ret; } -static void brw_emit_indices(struct brw_context *brw) +void brw_emit_indices(struct brw_context *brw, + const struct _mesa_index_buffer *index_buffer, + dri_bo *bo, + GLuint offset) { struct intel_context *intel = &brw->intel; - const struct _mesa_index_buffer *index_buffer = brw->ib.ib; - GLuint ib_size; - - if (index_buffer == NULL) - return; - - ib_size = get_size(index_buffer->type) * index_buffer->count; - + GLuint ib_size = get_size(index_buffer->type) * index_buffer->count; /* Emit the indexbuffer packet: */ { @@ -591,23 +590,13 @@ static void brw_emit_indices(struct brw_context *brw) BEGIN_BATCH(4, IGNORE_CLIPRECTS); OUT_BATCH( ib.header.dword ); - OUT_RELOC(brw->ib.bo, - I915_GEM_DOMAIN_VERTEX, 0, - brw->ib.offset); - OUT_RELOC(brw->ib.bo, - I915_GEM_DOMAIN_VERTEX, 0, - brw->ib.offset + ib_size); + OUT_RELOC( bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, offset); + OUT_RELOC( bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + offset + ib_size); OUT_BATCH( 0 ); ADVANCE_BATCH(); + + dri_bo_unreference(bo); } } -const struct brw_tracked_state brw_indices = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BATCH | BRW_NEW_INDICES, - .cache = 0, - }, - .prepare = brw_prepare_indices, - .emit = brw_emit_indices, -}; diff --git a/src/mesa/drivers/dri/i965/brw_fallback.c b/src/mesa/drivers/dri/i965/brw_fallback.c index 693f68f32a..8a8fb50cb9 100644 --- a/src/mesa/drivers/dri/i965/brw_fallback.c +++ b/src/mesa/drivers/dri/i965/brw_fallback.c @@ -95,9 +95,10 @@ static GLboolean do_check_fallback(struct brw_context *brw) return GL_FALSE; } -static void check_fallback(struct brw_context *brw) +static int check_fallback(struct brw_context *brw) { brw->intel.Fallback = do_check_fallback(brw); + return 0; } const struct brw_tracked_state brw_check_fallback = { diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 2daef0093b..9419315c7a 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -162,9 +162,10 @@ static void populate_key( struct brw_context *brw, /* Calculate interpolants for triangle and line rasterization. */ -static void prepare_gs_prog(struct brw_context *brw) +static int prepare_gs_prog( struct brw_context *brw ) { struct brw_gs_prog_key key; + int ret = 0; /* Populate the key: */ populate_key(brw, &key); @@ -182,7 +183,11 @@ static void prepare_gs_prog(struct brw_context *brw) &brw->gs.prog_data); if (brw->gs.prog_bo == NULL) compile_gs_prog( brw, &key ); + + ret |= dri_bufmgr_check_aperture_space(brw->gs.prog_bo); } + + return ret; } diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c index ff2e3ab059..f1f9e018f1 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_state.c @@ -106,17 +106,17 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) if (key->prog_active) { /* Emit GS program relocation */ - intel_bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - gs.thread0.grf_reg_count << 1, - offsetof(struct brw_gs_unit_state, thread0), - brw->gs.prog_bo); + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + gs.thread0.grf_reg_count << 1, + offsetof(struct brw_gs_unit_state, thread0), + brw->gs.prog_bo); } return bo; } -static void prepare_gs_unit(struct brw_context *brw) +static int prepare_gs_unit( struct brw_context *brw ) { struct brw_gs_unit_key key; @@ -130,6 +130,7 @@ static void prepare_gs_unit(struct brw_context *brw) if (brw->gs.state_bo == NULL) { brw->gs.state_bo = gs_unit_create_from_key(brw, &key); } + return dri_bufmgr_check_aperture_space(brw->gs.state_bo); } const struct brw_tracked_state brw_gs_unit = { diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 487c638ce2..62df2590f3 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -81,13 +81,6 @@ const struct brw_tracked_state brw_blend_constant_color = { static void upload_binding_table_pointers(struct brw_context *brw) { struct intel_context *intel = &brw->intel; - dri_bo *aper_array[] = { - intel->batch->buf, - brw->wm.bind_bo, - }; - - if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) - intel_batchbuffer_flush(intel->batch); BEGIN_BATCH(6, IGNORE_CLIPRECTS); OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); @@ -95,9 +88,7 @@ static void upload_binding_table_pointers(struct brw_context *brw) OUT_BATCH(0); /* gs */ OUT_BATCH(0); /* clip */ OUT_BATCH(0); /* sf */ - OUT_RELOC(brw->wm.bind_bo, - I915_GEM_DOMAIN_SAMPLER, 0, - 0); + OUT_RELOC(brw->wm.bind_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0); ADVANCE_BATCH(); } @@ -123,43 +114,49 @@ static void upload_pipelined_state_pointers(struct brw_context *brw ) BEGIN_BATCH(7, IGNORE_CLIPRECTS); OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2)); - OUT_RELOC(brw->vs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_RELOC(brw->vs.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0); if (brw->gs.prog_active) - OUT_RELOC(brw->gs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + OUT_RELOC(brw->gs.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 1); else OUT_BATCH(0); if (!brw->metaops.active) - OUT_RELOC(brw->clip.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + OUT_RELOC(brw->clip.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 1); else OUT_BATCH(0); - OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - OUT_RELOC(brw->wm.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_RELOC(brw->sf.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0); + OUT_RELOC(brw->wm.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0); + OUT_RELOC(brw->cc.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0); ADVANCE_BATCH(); brw->state.dirty.brw |= BRW_NEW_PSP; } +#if 0 +/* Combined into brw_psp_urb_cbs */ +const struct brw_tracked_state brw_pipelined_state_pointers = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_METAOPS | BRW_NEW_BATCH, + .cache = (CACHE_NEW_VS_UNIT | + CACHE_NEW_GS_UNIT | + CACHE_NEW_GS_PROG | + CACHE_NEW_CLIP_UNIT | + CACHE_NEW_SF_UNIT | + CACHE_NEW_WM_UNIT | + CACHE_NEW_CC_UNIT) + }, + .emit = upload_pipelined_state_pointers +}; +#endif + static void upload_psp_urb_cbs(struct brw_context *brw ) { - struct intel_context *intel = &brw->intel; - dri_bo *aper_array[] = { - intel->batch->buf, - brw->vs.state_bo, - brw->gs.state_bo, - brw->clip.state_bo, - brw->wm.state_bo, - brw->cc.state_bo, - }; - - if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) - intel_batchbuffer_flush(intel->batch); - upload_pipelined_state_pointers(brw); brw_upload_urb_fence(brw); brw_upload_constant_buffer_state(brw); } + const struct brw_tracked_state brw_psp_urb_cbs = { .dirty = { .mesa = 0, @@ -175,6 +172,22 @@ const struct brw_tracked_state brw_psp_urb_cbs = { .emit = upload_psp_urb_cbs, }; +/** + * Upload the depthbuffer offset and format. + * + * We have to do this per state validation as we need to emit the relocation + * in the batch buffer. + */ + +static int prepare_depthbuffer(struct brw_context *brw) +{ + struct intel_region *region = brw->state.depth_region; + + if (!region || !region->buffer) + return 0; + return dri_bufmgr_check_aperture_space(region->buffer); +} + static void emit_depthbuffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; @@ -196,10 +209,6 @@ static void emit_depthbuffer(struct brw_context *brw) ADVANCE_BATCH(); } else { unsigned int format; - dri_bo *aper_array[] = { - intel->batch->buf, - region->buffer - }; switch (region->cpp) { case 2: @@ -216,19 +225,15 @@ static void emit_depthbuffer(struct brw_context *brw) return; } - if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) - intel_batchbuffer_flush(intel->batch); - BEGIN_BATCH(len, IGNORE_CLIPRECTS); OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); OUT_BATCH(((region->pitch * region->cpp) - 1) | (format << 18) | (BRW_TILEWALK_YMAJOR << 26) | - ((region->tiling != I915_TILING_NONE) << 27) | + (region->tiled << 27) | (BRW_SURFACE_2D << 29)); OUT_RELOC(region->buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - 0); + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, 0); OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | ((region->pitch - 1) << 6) | ((region->height - 1) << 19)); @@ -247,6 +252,7 @@ const struct brw_tracked_state brw_depthbuffer = { .brw = BRW_NEW_DEPTH_BUFFER | BRW_NEW_BATCH, .cache = 0, }, + .prepare = prepare_depthbuffer, .emit = emit_depthbuffer, }; @@ -374,6 +380,40 @@ const struct brw_tracked_state brw_line_stipple = { }; + +/*********************************************************************** + * Misc constant state packets + */ + +static void upload_pipe_control(struct brw_context *brw) +{ + struct brw_pipe_control pc; + + return; + + memset(&pc, 0, sizeof(pc)); + + pc.header.opcode = CMD_PIPE_CONTROL; + pc.header.length = sizeof(pc)/4 - 2; + pc.header.post_sync_operation = PIPE_CONTROL_NOWRITE; + + pc.header.instruction_state_cache_flush_enable = 1; + + pc.bits1.dest_addr_type = PIPE_CONTROL_GTTWRITE_GLOBAL; + + BRW_BATCH_STRUCT(brw, &pc); +} + +const struct brw_tracked_state brw_pipe_control = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH, + .cache = 0 + }, + .emit = upload_pipe_control +}; + + /*********************************************************************** * Misc invarient state packets */ diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 5bb9e11310..0b61748321 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -125,7 +125,7 @@ static void compile_sf_prog( struct brw_context *brw, /* Calculate interpolants for triangle and line rasterization. */ -static void upload_sf_prog(struct brw_context *brw) +static int upload_sf_prog( struct brw_context *brw ) { struct brw_sf_prog_key key; @@ -174,6 +174,7 @@ static void upload_sf_prog(struct brw_context *brw) &brw->sf.prog_data); if (brw->sf.prog_bo == NULL) compile_sf_prog( brw, &key ); + return dri_bufmgr_check_aperture_space(brw->sf.prog_bo); } diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index 2478872b82..24388b79a5 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -37,7 +37,7 @@ #include "macros.h" #include "intel_fbo.h" -static void upload_sf_vp(struct brw_context *brw) +static int upload_sf_vp(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF; @@ -98,6 +98,8 @@ static void upload_sf_vp(struct brw_context *brw) dri_bo_unreference(brw->sf.vp_bo); brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 ); + + return dri_bufmgr_check_aperture_space(brw->sf.vp_bo); } const struct brw_tracked_state brw_sf_vp = { @@ -251,26 +253,27 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, NULL, NULL); /* Emit SF program relocation */ - intel_bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - sf.thread0.grf_reg_count << 1, - offsetof(struct brw_sf_unit_state, thread0), - brw->sf.prog_bo); + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + sf.thread0.grf_reg_count << 1, + offsetof(struct brw_sf_unit_state, thread0), + brw->sf.prog_bo); /* Emit SF viewport relocation */ - intel_bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - sf.sf5.front_winding | (sf.sf5.viewport_transform << 1), - offsetof(struct brw_sf_unit_state, sf5), - brw->sf.vp_bo); + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + sf.sf5.front_winding | (sf.sf5.viewport_transform << 1), + offsetof(struct brw_sf_unit_state, sf5), + brw->sf.vp_bo); return bo; } -static void upload_sf_unit( struct brw_context *brw ) +static int upload_sf_unit( struct brw_context *brw ) { struct brw_sf_unit_key key; dri_bo *reloc_bufs[2]; + int ret = 0; sf_unit_populate_key(brw, &key); @@ -285,6 +288,15 @@ static void upload_sf_unit( struct brw_context *brw ) if (brw->sf.state_bo == NULL) { brw->sf.state_bo = sf_unit_create_from_key(brw, &key, reloc_bufs); } + + if (reloc_bufs[0]) + ret |= dri_bufmgr_check_aperture_space(reloc_bufs[0]); + + if (reloc_bufs[1]) + ret |= dri_bufmgr_check_aperture_space(reloc_bufs[1]); + + ret |= dri_bufmgr_check_aperture_space(brw->sf.state_bo); + return ret; } const struct brw_tracked_state brw_sf_unit = { diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 3ea6151ae9..d1fca051ec 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -80,9 +80,6 @@ const struct brw_tracked_state brw_pipe_control; const struct brw_tracked_state brw_clear_surface_cache; const struct brw_tracked_state brw_clear_batch_cache; -const struct brw_tracked_state brw_indices; -const struct brw_tracked_state brw_vertices; - /*********************************************************************** * brw_state_cache.c */ diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index fc0c3bd9ff..d617650fad 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -214,7 +214,10 @@ brw_upload_cache( struct brw_cache *cache, /* Create the buffer object to contain the data */ bo = dri_bo_alloc(cache->brw->intel.bufmgr, - cache->name[cache_id], data_size, 1 << 6); + cache->name[cache_id], data_size, 1 << 6, + DRM_BO_FLAG_MEM_LOCAL | + DRM_BO_FLAG_CACHED | + DRM_BO_FLAG_CACHED_MAPPED); /* Set up the memory containing the key, aux_data, and reloc_bufs */ diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index d1d319d92e..3b2ccd48c3 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -80,6 +80,7 @@ const struct brw_tracked_state *atoms[] = */ &brw_invarient_state, &brw_state_base_address, + &brw_pipe_control, &brw_binding_table_pointers, &brw_blend_constant_color, @@ -101,8 +102,6 @@ const struct brw_tracked_state *atoms[] = &brw_psp_urb_cbs, #endif - &brw_indices, - &brw_vertices, NULL, /* brw_constant_buffer */ }; @@ -174,12 +173,10 @@ static void xor_states( struct brw_state_flags *result, /*********************************************************************** * Emit all state: */ -void brw_validate_state( struct brw_context *brw ) +int brw_validate_state( struct brw_context *brw ) { - struct intel_context *intel = &brw->intel; struct brw_state_flags *state = &brw->state.dirty; - GLuint i, count, pass = 0; - dri_bo *last_batch_bo = NULL; + GLuint i, ret, count; state->mesa |= brw->intel.NewGLState; brw->intel.NewGLState = 0; @@ -205,7 +202,7 @@ void brw_validate_state( struct brw_context *brw ) if (state->mesa == 0 && state->cache == 0 && state->brw == 0) - return; + return 0; if (brw->state.dirty.brw & BRW_NEW_CONTEXT) brw_clear_batch_cache_flush(brw); @@ -223,23 +220,15 @@ void brw_validate_state( struct brw_context *brw ) if (check_state(state, &atom->dirty)) { if (atom->prepare) { - atom->prepare(brw); + ret = atom->prepare(brw); + if (ret) + return ret; } } } if (brw->intel.Fallback) - return; - - /* We're about to try to set up a coherent state in the batchbuffer for - * the emission of primitives. If we exceed the aperture size in any of the - * emit() calls, we need to go back to square 1 and try setting up again. - */ -got_flushed: - dri_bo_unreference(last_batch_bo); - last_batch_bo = intel->batch->buf; - dri_bo_reference(last_batch_bo); - assert(pass++ <= 2); + return 0; if (INTEL_DEBUG) { /* Debug version which enforces various sanity checks on the @@ -262,11 +251,8 @@ got_flushed: break; if (check_state(state, &atom->dirty)) { - if (atom->emit) { + if (atom->emit) atom->emit( brw ); - if (intel->batch->buf != last_batch_bo) - goto got_flushed; - } } accumulate_state(&examined, &atom->dirty); @@ -288,17 +274,13 @@ got_flushed: break; if (check_state(state, &atom->dirty)) { - if (atom->emit) { + if (atom->emit) atom->emit( brw ); - if (intel->batch->buf != last_batch_bo) - goto got_flushed; - } } } } - dri_bo_unreference(last_batch_bo); - if (!brw->intel.Fallback) memset(state, 0, sizeof(*state)); + return 0; } diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c index 1116ade0a4..c423dbe7d7 100644 --- a/src/mesa/drivers/dri/i965/brw_urb.c +++ b/src/mesa/drivers/dri/i965/brw_urb.c @@ -74,7 +74,7 @@ static GLboolean check_urb_layout( struct brw_context *brw ) /* Most minimal update, forces re-emit of URB fence packet after GS * unit turned on/off. */ -static void recalculate_urb_fence( struct brw_context *brw ) +static int recalculate_urb_fence( struct brw_context *brw ) { GLuint csize = brw->curbe.total_size; GLuint vsize = brw->vs.prog_data->urb_entry_size; @@ -142,6 +142,7 @@ static void recalculate_urb_fence( struct brw_context *brw ) brw->state.dirty.brw |= BRW_NEW_URB_FENCE; } + return 0; } @@ -186,3 +187,15 @@ void brw_upload_urb_fence(struct brw_context *brw) BRW_BATCH_STRUCT(brw, &uf); } + + +#if 0 +const struct brw_tracked_state brw_urb_fence = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_URB_FENCE | BRW_NEW_PSP, + .cache = 0 + }, + .update = brw_upload_urb_fence +}; +#endif diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 1db7ceebcf..f89b0e14a1 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -83,7 +83,7 @@ static void do_vs_prog( struct brw_context *brw, } -static void brw_upload_vs_prog(struct brw_context *brw) +static int brw_upload_vs_prog( struct brw_context *brw ) { struct brw_vs_prog_key key; struct brw_vertex_program *vp = @@ -115,6 +115,7 @@ static void brw_upload_vs_prog(struct brw_context *brw) &brw->vs.prog_data); if (brw->vs.prog_bo == NULL) do_vs_prog(brw, vp, &key); + return dri_bufmgr_check_aperture_space(brw->vs.prog_bo); } diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c index 734a926e96..a0106b8975 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_constval.c +++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c @@ -166,7 +166,7 @@ static GLuint get_input_size(struct brw_context *brw, /* Calculate sizes of vertex program outputs. Size is the largest * component index which might vary from [0,0,0,1] */ -static void calc_wm_input_sizes( struct brw_context *brw ) +static int calc_wm_input_sizes( struct brw_context *brw ) { /* BRW_NEW_VERTEX_PROGRAM */ struct brw_vertex_program *vp = @@ -210,6 +210,7 @@ static void calc_wm_input_sizes( struct brw_context *brw ) memcpy(brw->wm.input_size_masks, t.size_masks, sizeof(t.size_masks)); brw->state.dirty.brw |= BRW_NEW_WM_INPUT_DIMENSIONS; } + return 0; } const struct brw_tracked_state brw_wm_input_sizes = { diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index 909b942610..2a64f3df33 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -115,16 +115,16 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) NULL, NULL); /* Emit VS program relocation */ - intel_bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - vs.thread0.grf_reg_count << 1, - offsetof(struct brw_vs_unit_state, thread0), - brw->vs.prog_bo); + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + vs.thread0.grf_reg_count << 1, + offsetof(struct brw_vs_unit_state, thread0), + brw->vs.prog_bo); return bo; } -static void prepare_vs_unit(struct brw_context *brw) +static int prepare_vs_unit( struct brw_context *brw ) { struct brw_vs_unit_key key; @@ -138,6 +138,7 @@ static void prepare_vs_unit(struct brw_context *brw) if (brw->vs.state_bo == NULL) { brw->vs.state_bo = vs_unit_create_from_key(brw, &key); } + return dri_bufmgr_check_aperture_space(brw->vs.state_bo); } const struct brw_tracked_state brw_vs_unit = { diff --git a/src/mesa/drivers/dri/i965/brw_vs_tnl.c b/src/mesa/drivers/dri/i965/brw_vs_tnl.c index 2caa020e61..e409620bbf 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_tnl.c +++ b/src/mesa/drivers/dri/i965/brw_vs_tnl.c @@ -1581,7 +1581,7 @@ static GLuint hash_key( struct state_key *key ) return hash; } -static void prepare_tnl_program( struct brw_context *brw ) +static int prepare_tnl_program( struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; struct state_key key; @@ -1590,7 +1590,7 @@ static void prepare_tnl_program( struct brw_context *brw ) /* _NEW_PROGRAM */ if (brw->attribs.VertexProgram->_Current) - return; + return 0; /* Grab all the relevent state and put it in a single structure: */ @@ -1623,7 +1623,7 @@ static void prepare_tnl_program( struct brw_context *brw ) if (old != brw->tnl_program) brw->state.dirty.brw |= BRW_NEW_TNL_PROGRAM; - return; + return 0; } /* Note: See brw_draw.c - the vertex program must not rely on @@ -1649,7 +1649,7 @@ const struct brw_tracked_state brw_tnl_vertprog = { -static void prepare_active_vertprog( struct brw_context *brw ) +static int prepare_active_vertprog( struct brw_context *brw ) { const struct gl_vertex_program *prev = brw->vertex_program; @@ -1664,6 +1664,8 @@ static void prepare_active_vertprog( struct brw_context *brw ) if (brw->vertex_program != prev) brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; + + return 0; } diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 361312c2ca..a470a25636 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -322,7 +322,7 @@ static void brw_wm_populate_key( struct brw_context *brw, } -static void brw_prepare_wm_prog(struct brw_context *brw) +static int brw_prepare_wm_prog( struct brw_context *brw ) { struct brw_wm_prog_key key; struct brw_fragment_program *fp = (struct brw_fragment_program *) @@ -339,6 +339,8 @@ static void brw_prepare_wm_prog(struct brw_context *brw) &brw->wm.prog_data); if (brw->wm.prog_bo == NULL) do_wm_prog(brw, fp, &key); + + return dri_bufmgr_check_aperture_space(brw->wm.prog_bo); } diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index 08d01823de..d40332e9ae 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -255,10 +255,11 @@ brw_wm_sampler_populate_key(struct brw_context *brw, * complicates various things. However, this is still too confusing - * FIXME: simplify all the different new texture state flags. */ -static void upload_wm_samplers( struct brw_context *brw ) +static int upload_wm_samplers( struct brw_context *brw ) { struct wm_sampler_key key; int i; + int ret = 0; brw_wm_sampler_populate_key(brw, &key); @@ -270,7 +271,7 @@ static void upload_wm_samplers( struct brw_context *brw ) dri_bo_unreference(brw->wm.sampler_bo); brw->wm.sampler_bo = NULL; if (brw->wm.sampler_count == 0) - return; + return 0; brw->wm.sampler_bo = brw_search_cache(&brw->cache, BRW_SAMPLER, &key, sizeof(key), @@ -303,14 +304,19 @@ static void upload_wm_samplers( struct brw_context *brw ) if (!brw->attribs.Texture->Unit[i]._ReallyEnabled) continue; - intel_bo_emit_reloc(brw->wm.sampler_bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - 0, - i * sizeof(struct brw_sampler_state) + - offsetof(struct brw_sampler_state, ss2), - brw->wm.sdc_bo[i]); + ret |= dri_bufmgr_check_aperture_space(brw->wm.sdc_bo[i]); + dri_emit_reloc(brw->wm.sampler_bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + 0, + i * sizeof(struct brw_sampler_state) + + offsetof(struct brw_sampler_state, ss2), + brw->wm.sdc_bo[i]); } } + + ret |= dri_bufmgr_check_aperture_space(brw->wm.sampler_bo); + return ret; + } const struct brw_tracked_state brw_wm_samplers = { diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index f97d0dc285..f4da0f279e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -199,39 +199,40 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, NULL, NULL); /* Emit WM program relocation */ - intel_bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - wm.thread0.grf_reg_count << 1, - offsetof(struct brw_wm_unit_state, thread0), - brw->wm.prog_bo); + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + wm.thread0.grf_reg_count << 1, + offsetof(struct brw_wm_unit_state, thread0), + brw->wm.prog_bo); /* Emit scratch space relocation */ if (key->total_scratch != 0) { - intel_bo_emit_reloc(bo, - 0, 0, - wm.thread2.per_thread_scratch_space, - offsetof(struct brw_wm_unit_state, thread2), - brw->wm.scratch_buffer); + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, + wm.thread2.per_thread_scratch_space, + offsetof(struct brw_wm_unit_state, thread2), + brw->wm.scratch_buffer); } /* Emit sampler state relocation */ if (key->sampler_count != 0) { - intel_bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - wm.wm4.stats_enable | (wm.wm4.sampler_count << 2), - offsetof(struct brw_wm_unit_state, wm4), - brw->wm.sampler_bo); + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + wm.wm4.stats_enable | (wm.wm4.sampler_count << 2), + offsetof(struct brw_wm_unit_state, wm4), + brw->wm.sampler_bo); } return bo; } -static void upload_wm_unit( struct brw_context *brw ) +static int upload_wm_unit( struct brw_context *brw ) { struct intel_context *intel = &brw->intel; struct brw_wm_unit_key key; dri_bo *reloc_bufs[3]; + int ret = 0, i; wm_unit_populate_key(brw, &key); /* Allocate the necessary scratch space if we haven't already. Don't @@ -250,7 +251,7 @@ static void upload_wm_unit( struct brw_context *brw ) brw->wm.scratch_buffer = dri_bo_alloc(intel->bufmgr, "wm scratch", total, - 4096); + 4096, DRM_BO_FLAG_MEM_TT); } } @@ -266,6 +267,12 @@ static void upload_wm_unit( struct brw_context *brw ) if (brw->wm.state_bo == NULL) { brw->wm.state_bo = wm_unit_create_from_key(brw, &key, reloc_bufs); } + + for (i = 0; i < 3; i++) + if (reloc_bufs[i]) + ret |= dri_bufmgr_check_aperture_space(reloc_bufs[i]); + ret |= dri_bufmgr_check_aperture_space(brw->wm.state_bo); + return ret; } const struct brw_tracked_state brw_wm_unit = { diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index af1401b34b..2ba3eb4de8 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -154,29 +154,10 @@ struct brw_wm_surface_key { GLint first_level, last_level; GLint width, height, depth; GLint pitch, cpp; - uint32_t tiling; + GLboolean tiled; GLuint offset; }; -static void -brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) -{ - switch (tiling) { - case I915_TILING_NONE: - surf->ss3.tiled_surface = 0; - surf->ss3.tile_walk = 0; - break; - case I915_TILING_X: - surf->ss3.tiled_surface = 1; - surf->ss3.tile_walk = BRW_TILEWALK_XMAJOR; - break; - case I915_TILING_Y: - surf->ss3.tiled_surface = 1; - surf->ss3.tile_walk = BRW_TILEWALK_YMAJOR; - break; - } -} - static dri_bo * brw_create_texture_surface( struct brw_context *brw, struct brw_wm_surface_key *key ) @@ -211,7 +192,9 @@ brw_create_texture_surface( struct brw_context *brw, surf.ss2.mip_count = key->last_level - key->first_level; surf.ss2.width = key->width - 1; surf.ss2.height = key->height - 1; - brw_set_surface_tiling(&surf, key->tiling); + + surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; + surf.ss3.tiled_surface = key->tiled; surf.ss3.pitch = (key->pitch * key->cpp) - 1; surf.ss3.depth = key->depth - 1; @@ -231,19 +214,18 @@ brw_create_texture_surface( struct brw_context *brw, &key->bo, key->bo ? 1 : 0, &surf, sizeof(surf), NULL, NULL); - if (key->bo) { /* Emit relocation to surface contents */ - intel_bo_emit_reloc(bo, - I915_GEM_DOMAIN_SAMPLER, 0, - 0, - offsetof(struct brw_surface_state, ss1), - key->bo); + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + 0, + offsetof(struct brw_surface_state, ss1), + key->bo); } return bo; } -static void +static int brw_update_texture_surface( GLcontext *ctx, GLuint unit ) { struct brw_context *brw = brw_context(ctx); @@ -251,6 +233,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) struct intel_texture_object *intelObj = intel_texture_object(tObj); struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel]; struct brw_wm_surface_key key; + int ret = 0; memset(&key, 0, sizeof(key)); @@ -265,6 +248,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.depth = firstImage->Depth; key.bo = intelObj->mt->region->buffer; key.offset = 0; + ret |= dri_bufmgr_check_aperture_space(key.bo); } key.target = tObj->Target; @@ -274,8 +258,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.width = firstImage->Width; key.height = firstImage->Height; key.cpp = intelObj->mt->cpp; - key.depth = firstImage->Depth; - key.tiling = intelObj->mt->region->tiling; + key.tiled = intelObj->mt->region->tiled; dri_bo_unreference(brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS]); brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, @@ -285,6 +268,9 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) if (brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] == NULL) { brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_create_texture_surface(brw, &key); } + + ret |= dri_bufmgr_check_aperture_space(brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS]); + return ret; } /** @@ -292,18 +278,18 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) * While it is only used for the front/back buffer currently, it should be * usable for further buffers when doing ARB_draw_buffer support. */ -static void +static int brw_update_region_surface(struct brw_context *brw, struct intel_region *region, unsigned int unit, GLboolean cached) { dri_bo *region_bo = NULL; + int ret = 0; struct { unsigned int surface_type; unsigned int surface_format; unsigned int width, height, cpp; GLubyte color_mask[4]; - GLboolean color_blend; - uint32_t tiling; + GLboolean tiled, color_blend; } key; memset(&key, 0, sizeof(key)); @@ -316,14 +302,16 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; else key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; - key.tiling = region->tiling; + key.tiled = region->tiled; key.width = region->pitch; /* XXX: not really! */ key.height = region->height; key.cpp = region->cpp; + + ret |= dri_bufmgr_check_aperture_space(region->buffer); } else { key.surface_type = BRW_SURFACE_NULL; key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - key.tiling = 0; + key.tiled = 0; key.width = 1; key.height = 1; key.cpp = 4; @@ -353,7 +341,8 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, surf.ss2.width = key.width - 1; surf.ss2.height = key.height - 1; - brw_set_surface_tiling(&surf, key.tiling); + surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; + surf.ss3.tiled_surface = key.tiled; surf.ss3.pitch = (key.width * key.cpp) - 1; /* _NEW_COLOR */ @@ -370,19 +359,19 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, &surf, sizeof(surf), NULL, NULL); if (region_bo != NULL) { - /* We might sample from it, and we might render to it, so flag - * them both. We might be able to figure out from other state - * a more restrictive relocation to emit. - */ - intel_bo_emit_reloc(brw->wm.surf_bo[unit], - I915_GEM_DOMAIN_RENDER | - I915_GEM_DOMAIN_SAMPLER, - I915_GEM_DOMAIN_RENDER, - 0, - offsetof(struct brw_surface_state, ss1), - region_bo); + dri_emit_reloc(brw->wm.surf_bo[unit], + DRM_BO_FLAG_MEM_TT | + DRM_BO_FLAG_READ | + DRM_BO_FLAG_WRITE, + 0, + offsetof(struct brw_surface_state, ss1), + region_bo); } } + + ret |= dri_bufmgr_check_aperture_space(brw->wm.surf_bo[unit]); + + return ret; } @@ -420,11 +409,13 @@ brw_wm_get_binding_table(struct brw_context *brw) /* Emit binding table relocations to surface state */ for (i = 0; i < BRW_WM_MAX_SURF; i++) { if (brw->wm.surf_bo[i] != NULL) { - intel_bo_emit_reloc(bind_bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - 0, - i * sizeof(GLuint), - brw->wm.surf_bo[i]); + dri_emit_reloc(bind_bo, + DRM_BO_FLAG_MEM_TT | + DRM_BO_FLAG_READ | + DRM_BO_FLAG_WRITE, + 0, + i * sizeof(GLuint), + brw->wm.surf_bo[i]); } } @@ -434,19 +425,23 @@ brw_wm_get_binding_table(struct brw_context *brw) return bind_bo; } -static void prepare_wm_surfaces(struct brw_context *brw ) +static int prepare_wm_surfaces(struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; - GLuint i; + GLuint i, ret; if (brw->state.nr_draw_regions > 1) { for (i = 0; i < brw->state.nr_draw_regions; i++) { - brw_update_region_surface(brw, brw->state.draw_regions[i], i, - GL_FALSE); + ret = brw_update_region_surface(brw, brw->state.draw_regions[i], i, + GL_FALSE); + if (ret) + return ret; } }else { - brw_update_region_surface(brw, brw->state.draw_regions[0], 0, GL_TRUE); + ret = brw_update_region_surface(brw, brw->state.draw_regions[0], 0, GL_TRUE); + if (ret) + return ret; } brw->wm.nr_surfaces = MAX_DRAW_BUFFERS; @@ -462,8 +457,11 @@ static void prepare_wm_surfaces(struct brw_context *brw ) dri_bo_reference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]); brw->wm.nr_surfaces = i + MAX_DRAW_BUFFERS + 1; } else { - brw_update_texture_surface(ctx, i); + ret = brw_update_texture_surface(ctx, i); brw->wm.nr_surfaces = i + MAX_DRAW_BUFFERS + 1; + + if (ret) + return ret; } } else { dri_bo_unreference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]); @@ -474,6 +472,8 @@ static void prepare_wm_surfaces(struct brw_context *brw ) dri_bo_unreference(brw->wm.bind_bo); brw->wm.bind_bo = brw_wm_get_binding_table(brw); + + return dri_bufmgr_check_aperture_space(brw->wm.bind_bo); } diff --git a/src/mesa/drivers/dri/i965/intel_bufmgr_ttm.c b/src/mesa/drivers/dri/i965/intel_bufmgr_ttm.c new file mode 120000 index 0000000000..e9df5c6279 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_bufmgr_ttm.c @@ -0,0 +1 @@ +../intel/intel_bufmgr_ttm.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 5afaad070c..a594fb6cc4 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -29,7 +29,6 @@ #include "intel_ioctl.h" #include "intel_decode.h" #include "intel_reg.h" -#include "intel_bufmgr.h" /* Relocations in kernel space: * - pass dma buffer seperately @@ -79,21 +78,19 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch) batch->buf = NULL; } - if (!batch->buffer && intel->ttm == GL_TRUE) - batch->buffer = malloc (intel->maxBatchSize); - batch->buf = dri_bo_alloc(intel->bufmgr, "batchbuffer", - intel->maxBatchSize, 4096); - if (batch->buffer) - batch->map = batch->buffer; - else { - dri_bo_map(batch->buf, GL_TRUE); - batch->map = batch->buf->virtual; - } + intel->maxBatchSize, 4096, + DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED); + dri_bo_map(batch->buf, GL_TRUE); + batch->map = batch->buf->virtual; batch->size = intel->maxBatchSize; batch->ptr = batch->map; batch->dirty_state = ~0; batch->cliprect_mode = IGNORE_CLIPRECTS; + + /* account batchbuffer in aperture */ + dri_bufmgr_check_aperture_space(batch->buf); + } struct intel_batchbuffer * @@ -102,6 +99,7 @@ intel_batchbuffer_alloc(struct intel_context *intel) struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1); batch->intel = intel; + batch->last_fence = NULL; intel_batchbuffer_reset(batch); return batch; @@ -110,13 +108,14 @@ intel_batchbuffer_alloc(struct intel_context *intel) void intel_batchbuffer_free(struct intel_batchbuffer *batch) { - if (batch->buffer) - free (batch->buffer); - else { - if (batch->map) { - dri_bo_unmap(batch->buf); - batch->map = NULL; - } + if (batch->last_fence) { + dri_fence_wait(batch->last_fence); + dri_fence_unreference(batch->last_fence); + batch->last_fence = NULL; + } + if (batch->map) { + dri_bo_unmap(batch->buf); + batch->map = NULL; } dri_bo_unreference(batch->buf); batch->buf = NULL; @@ -132,12 +131,11 @@ do_flush_locked(struct intel_batchbuffer *batch, GLuint used, GLboolean allow_unlock) { struct intel_context *intel = batch->intel; - int ret = 0; + void *start; + GLuint count; - if (batch->buffer) - dri_bo_subdata (batch->buf, 0, used, batch->buffer); - else - dri_bo_unmap(batch->buf); + dri_bo_unmap(batch->buf); + start = dri_process_relocs(batch->buf, &count); batch->map = NULL; batch->ptr = NULL; @@ -150,25 +148,21 @@ do_flush_locked(struct intel_batchbuffer *batch, if (!(intel->numClipRects == 0 && batch->cliprect_mode == LOOP_CLIPRECTS)) { if (intel->ttm == GL_TRUE) { - struct drm_i915_gem_execbuffer *execbuf; - - execbuf = dri_process_relocs(batch->buf); - ret = intel_exec_ioctl(batch->intel, - used, - batch->cliprect_mode != LOOP_CLIPRECTS, - allow_unlock, - execbuf); + intel_exec_ioctl(batch->intel, + used, + batch->cliprect_mode != LOOP_CLIPRECTS, + allow_unlock, + start, count, &batch->last_fence); } else { - dri_process_relocs(batch->buf); - ret = intel_batch_ioctl(batch->intel, - batch->buf->offset, - used, - batch->cliprect_mode != LOOP_CLIPRECTS, - allow_unlock); + intel_batch_ioctl(batch->intel, + batch->buf->offset, + used, + batch->cliprect_mode != LOOP_CLIPRECTS, + allow_unlock); } } - - dri_post_submit(batch->buf); + + dri_post_submit(batch->buf, &batch->last_fence); if (intel->numClipRects == 0 && batch->cliprect_mode == LOOP_CLIPRECTS) { @@ -193,10 +187,6 @@ do_flush_locked(struct intel_batchbuffer *batch, intel->vtbl.debug_batch(intel); } - if (ret != 0) { - UNLOCK_HARDWARE(intel); - exit(1); - } intel->vtbl.new_batch(intel); } @@ -214,27 +204,21 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, if (INTEL_DEBUG & DEBUG_BATCH) fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line, used); - - /* Emit a flush if the bufmgr doesn't do it for us. */ - if (!intel->ttm) { - *(GLuint *) (batch->ptr) = intel->vtbl.flush_cmd(); - batch->ptr += 4; - used = batch->ptr - batch->map; + /* Add the MI_BATCH_BUFFER_END. Always add an MI_FLUSH - this is a + * performance drain that we would like to avoid. + */ + if (used & 4) { + ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd(); + ((int *) batch->ptr)[1] = 0; + ((int *) batch->ptr)[2] = MI_BATCH_BUFFER_END; + used += 12; } - - /* Round batchbuffer usage to 2 DWORDs. */ - - if ((used & 4) == 0) { - *(GLuint *) (batch->ptr) = 0; /* noop */ - batch->ptr += 4; - used = batch->ptr - batch->map; + else { + ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd(); + ((int *) batch->ptr)[1] = MI_BATCH_BUFFER_END; + used += 8; } - /* Mark the end of the buffer. */ - *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END; /* noop */ - batch->ptr += 4; - used = batch->ptr - batch->map; - /* Workaround for recursive batchbuffer flushing: If the window is * moved, we can get into a case where we try to flush during a * flush. What happens is that when we try to grab the lock for @@ -246,9 +230,6 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, * avoid that in the first place. */ batch->ptr = batch->map; - if (intel->vtbl.finish_batch) - intel->vtbl.finish_batch(intel); - /* TODO: Just pass the relocation list and dma buffer up to the * kernel. */ @@ -261,13 +242,9 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, UNLOCK_HARDWARE(intel); if (INTEL_DEBUG & DEBUG_SYNC) { - int irq; - fprintf(stderr, "waiting for idle\n"); - LOCK_HARDWARE(intel); - irq = intelEmitIrqLocked(intel); - UNLOCK_HARDWARE(intel); - intelWaitIrq(intel, irq); + if (batch->last_fence != NULL) + dri_fence_wait(batch->last_fence); } /* Reset the buffer: @@ -275,22 +252,25 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, intel_batchbuffer_reset(batch); } +void +intel_batchbuffer_finish(struct intel_batchbuffer *batch) +{ + intel_batchbuffer_flush(batch); + if (batch->last_fence != NULL) + dri_fence_wait(batch->last_fence); +} + /* This is the only way buffers get added to the validate list. */ GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, dri_bo *buffer, - uint32_t read_domains, uint32_t write_domain, - uint32_t delta) + GLuint flags, GLuint delta) { int ret; - if (batch->ptr - batch->map > batch->buf->size) - _mesa_printf ("bad relocation ptr %p map %p offset %d size %d\n", - batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size); - ret = intel_bo_emit_reloc(batch->buf, read_domains, write_domain, - delta, batch->ptr - batch->map, buffer); + ret = dri_emit_reloc(batch->buf, flags, delta, batch->ptr - batch->map, buffer); /* * Using the old buffer offset, write in what the right data would be, in case diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h index 52d6ecc223..0da602010e 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -4,7 +4,6 @@ #include "mtypes.h" #include "dri_bufmgr.h" -#include "intel_reg.h" struct intel_context; @@ -41,8 +40,7 @@ struct intel_batchbuffer struct intel_context *intel; dri_bo *buf; - - GLubyte *buffer; + dri_fence *last_fence; GLubyte *map; GLubyte *ptr; @@ -60,6 +58,8 @@ struct intel_batchbuffer *intel_batchbuffer_alloc(struct intel_context void intel_batchbuffer_free(struct intel_batchbuffer *batch); +void intel_batchbuffer_finish(struct intel_batchbuffer *batch); + void _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, int line); @@ -82,16 +82,14 @@ void intel_batchbuffer_release_space(struct intel_batchbuffer *batch, GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, dri_bo *buffer, - uint32_t read_domains, - uint32_t write_domain, - uint32_t offset); + GLuint flags, GLuint offset); /* Inline functions - might actually be better off with these * non-inlined. Certainly better off switching all command packets to * be passed as structs rather than dwords, but that's a little bit of * work... */ -static INLINE GLint +static INLINE GLuint intel_batchbuffer_space(struct intel_batchbuffer *batch) { return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map); @@ -138,20 +136,12 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch, #define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d) -#define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ +#define OUT_RELOC(buf, cliprect_mode, delta) do { \ assert((delta) >= 0); \ - intel_batchbuffer_emit_reloc(intel->batch, buf, \ - read_domains, write_domain, delta); \ + intel_batchbuffer_emit_reloc(intel->batch, buf, cliprect_mode, delta); \ } while (0) #define ADVANCE_BATCH() do { } while(0) -static INLINE void -intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch) -{ - intel_batchbuffer_require_space(batch, 4, IGNORE_CLIPRECTS); - intel_batchbuffer_emit_dword(batch, MI_FLUSH); -} - #endif diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index 7129a4ba91..25ac609f13 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -54,6 +54,7 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, struct intel_context *intel; const intelScreenPrivate *intelScreen; + int ret; DBG("%s\n", __FUNCTION__); @@ -65,6 +66,14 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, intelScreen = intel->intelScreen; + if (intel->last_swap_fence) { + dri_fence_wait(intel->last_swap_fence); + dri_fence_unreference(intel->last_swap_fence); + intel->last_swap_fence = NULL; + } + intel->last_swap_fence = intel->first_swap_fence; + intel->first_swap_fence = NULL; + /* The LOCK_HARDWARE is required for the cliprects. Buffer offsets * should work regardless. */ @@ -80,7 +89,6 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, unsigned short src_x, src_y; int BR13, CMD; int i; - dri_bo *aper_array[3]; src = intel_get_rb_region(&intel_fb->Base, BUFFER_BACK_LEFT); dst = intel_get_rb_region(&intel_fb->Base, BUFFER_FRONT_LEFT); @@ -106,28 +114,26 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, } #ifndef I915 - if (src->tiling != I915_TILING_NONE) { + if (src->tiled) { CMD |= XY_SRC_TILED; src_pitch /= 4; } - if (dst->tiling != I915_TILING_NONE) { + if (dst->tiled) { CMD |= XY_DST_TILED; dst_pitch /= 4; } #endif /* do space/cliprects check before going any further */ - intel_batchbuffer_require_space(intel->batch, 8 * 4, - REFERENCES_CLIPRECTS); + intel_batchbuffer_require_space(intel->batch, 8 * 4, REFERENCES_CLIPRECTS); again: - aper_array[0] = intel->batch->buf; - aper_array[1] = dst->buffer; - aper_array[2] = src->buffer; - - if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) { + ret = dri_bufmgr_check_aperture_space(dst->buffer); + ret |= dri_bufmgr_check_aperture_space(src->buffer); + + if (ret) { intel_batchbuffer_flush(intel->batch); goto again; } - + for (i = 0; i < nbox; i++, pbox++) { drm_clip_rect_t box = *pbox; @@ -151,22 +157,19 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, OUT_BATCH((box.y1 << 16) | box.x1); OUT_BATCH((box.y2 << 16) | box.x2); - OUT_RELOC(dst->buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - 0); + OUT_RELOC(dst->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, 0); OUT_BATCH((src_y << 16) | src_x); OUT_BATCH(src_pitch); - OUT_RELOC(src->buffer, - I915_GEM_DOMAIN_RENDER, 0, - 0); + OUT_RELOC(src->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0); ADVANCE_BATCH(); } - /* Flush the rendering and the batch so that the results all land on the - * screen in a timely fashion. - */ - intel_batchbuffer_emit_mi_flush(intel->batch); + if (intel->first_swap_fence) + dri_fence_unreference(intel->first_swap_fence); intel_batchbuffer_flush(intel->batch); + intel->first_swap_fence = intel->batch->last_fence; + if (intel->first_swap_fence) + dri_fence_reference(intel->first_swap_fence); } UNLOCK_HARDWARE(intel); @@ -181,7 +184,7 @@ intelEmitFillBlit(struct intel_context *intel, GLshort dst_pitch, dri_bo *dst_buffer, GLuint dst_offset, - uint32_t dst_tiling, + GLboolean dst_tiled, GLshort x, GLshort y, GLshort w, GLshort h, GLuint color) @@ -206,7 +209,7 @@ intelEmitFillBlit(struct intel_context *intel, return; } #ifndef I915 - if (dst_tiling != I915_TILING_NONE) { + if (dst_tiled) { CMD |= XY_DST_TILED; dst_pitch /= 4; } @@ -223,9 +226,7 @@ intelEmitFillBlit(struct intel_context *intel, OUT_BATCH(BR13 | dst_pitch); OUT_BATCH((y << 16) | x); OUT_BATCH(((y + h) << 16) | (x + w)); - OUT_RELOC(dst_buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - dst_offset); + OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset); OUT_BATCH(color); ADVANCE_BATCH(); } @@ -262,11 +263,11 @@ intelEmitCopyBlit(struct intel_context *intel, GLshort src_pitch, dri_bo *src_buffer, GLuint src_offset, - uint32_t src_tiling, + GLboolean src_tiled, GLshort dst_pitch, dri_bo *dst_buffer, GLuint dst_offset, - uint32_t dst_tiling, + GLboolean dst_tiled, GLshort src_x, GLshort src_y, GLshort dst_x, GLshort dst_y, GLshort w, GLshort h, @@ -275,19 +276,17 @@ intelEmitCopyBlit(struct intel_context *intel, GLuint CMD, BR13; int dst_y2 = dst_y + h; int dst_x2 = dst_x + w; - dri_bo *aper_array[3]; + int ret; BATCH_LOCALS; /* do space/cliprects check before going any further */ intel_batchbuffer_require_space(intel->batch, 8 * 4, NO_LOOP_CLIPRECTS); again: - aper_array[0] = intel->batch->buf; - aper_array[1] = dst_buffer; - aper_array[2] = src_buffer; - - if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) { - intel_batchbuffer_flush(intel->batch); - goto again; + ret = dri_bufmgr_check_aperture_space(dst_buffer); + ret |= dri_bufmgr_check_aperture_space(src_buffer); + if (ret) { + intel_batchbuffer_flush(intel->batch); + goto again; } DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", @@ -316,11 +315,11 @@ intelEmitCopyBlit(struct intel_context *intel, } #ifndef I915 - if (dst_tiling != I915_TILING_NONE) { + if (dst_tiled) { CMD |= XY_DST_TILED; dst_pitch /= 4; } - if (src_tiling != I915_TILING_NONE) { + if (src_tiled) { CMD |= XY_SRC_TILED; src_pitch /= 4; } @@ -346,13 +345,11 @@ intelEmitCopyBlit(struct intel_context *intel, OUT_BATCH(BR13 | dst_pitch); OUT_BATCH((dst_y << 16) | dst_x); OUT_BATCH((dst_y2 << 16) | dst_x2); - OUT_RELOC(dst_buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset); OUT_BATCH((src_y << 16) | src_x); OUT_BATCH(src_pitch); - OUT_RELOC(src_buffer, - I915_GEM_DOMAIN_RENDER, 0, + OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, src_offset); ADVANCE_BATCH(); } @@ -365,17 +362,14 @@ intelEmitCopyBlit(struct intel_context *intel, OUT_BATCH(BR13 | ((uint16_t)dst_pitch)); OUT_BATCH((0 << 16) | dst_x); OUT_BATCH((h << 16) | dst_x2); - OUT_RELOC(dst_buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset + dst_y * dst_pitch); OUT_BATCH((0 << 16) | src_x); OUT_BATCH(src_pitch); - OUT_RELOC(src_buffer, - I915_GEM_DOMAIN_RENDER, 0, + OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, src_offset + src_y * src_pitch); ADVANCE_BATCH(); } - intel_batchbuffer_emit_mi_flush(intel->batch); } @@ -519,7 +513,7 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) } #ifndef I915 - if (irb_region->tiling != I915_TILING_NONE) { + if (irb_region->tiled) { CMD |= XY_DST_TILED; pitch /= 4; } @@ -547,8 +541,7 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) OUT_BATCH(BR13); OUT_BATCH((b.y1 << 16) | b.x1); OUT_BATCH((b.y2 << 16) | b.x2); - OUT_RELOC(write_buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + OUT_RELOC(write_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, irb_region->draw_offset); OUT_BATCH(clearVal); ADVANCE_BATCH(); @@ -556,7 +549,7 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) } } } - intel_batchbuffer_emit_mi_flush(intel->batch); + intel_batchbuffer_flush(intel->batch); } UNLOCK_HARDWARE(intel); @@ -570,7 +563,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, GLshort dst_pitch, dri_bo *dst_buffer, GLuint dst_offset, - uint32_t dst_tiling, + GLboolean dst_tiled, GLshort x, GLshort y, GLshort w, GLshort h, GLenum logic_op) @@ -594,13 +587,13 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, (8 * 4) + (3 * 4) + dwords, - REFERENCES_CLIPRECTS ); + NO_LOOP_CLIPRECTS ); opcode = XY_SETUP_BLT_CMD; if (cpp == 4) opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; #ifndef I915 - if (dst_tiling != I915_TILING_NONE) { + if (dst_tiled) { opcode |= XY_DST_TILED; dst_pitch /= 4; } @@ -613,17 +606,15 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, br13 |= BR13_8888; blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */ - if (dst_tiling != I915_TILING_NONE) + if (dst_tiled) blit_cmd |= XY_DST_TILED; - BEGIN_BATCH(8 + 3, REFERENCES_CLIPRECTS); + BEGIN_BATCH(8 + 3, NO_LOOP_CLIPRECTS); OUT_BATCH(opcode); OUT_BATCH(br13); OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */ OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */ - OUT_RELOC(dst_buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - dst_offset); + OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset); OUT_BATCH(0); /* bg */ OUT_BATCH(fg_color); /* fg */ OUT_BATCH(0); /* pattern base addr */ @@ -636,7 +627,5 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, intel_batchbuffer_data( intel->batch, src_bits, dwords * 4, - REFERENCES_CLIPRECTS ); - - intel_batchbuffer_emit_mi_flush(intel->batch); + NO_LOOP_CLIPRECTS ); } diff --git a/src/mesa/drivers/dri/intel/intel_blit.h b/src/mesa/drivers/dri/intel/intel_blit.h index 0881cc4fdc..fc0620caba 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.h +++ b/src/mesa/drivers/dri/intel/intel_blit.h @@ -42,11 +42,11 @@ extern void intelEmitCopyBlit(struct intel_context *intel, GLshort src_pitch, dri_bo *src_buffer, GLuint src_offset, - uint32_t src_tiling, + GLboolean src_tiled, GLshort dst_pitch, dri_bo *dst_buffer, GLuint dst_offset, - uint32_t dst_tiling, + GLboolean dst_tiled, GLshort srcx, GLshort srcy, GLshort dstx, GLshort dsty, GLshort w, GLshort h, @@ -57,7 +57,7 @@ extern void intelEmitFillBlit(struct intel_context *intel, GLshort dst_pitch, dri_bo *dst_buffer, GLuint dst_offset, - uint32_t dst_tiling, + GLboolean dst_tiled, GLshort x, GLshort y, GLshort w, GLshort h, GLuint color); @@ -69,7 +69,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, GLshort dst_pitch, dri_bo *dst_buffer, GLuint dst_offset, - uint32_t dst_tiling, + GLboolean dst_tiled, GLshort x, GLshort y, GLshort w, GLshort h, GLenum logic_op); diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index 1923a21516..951b8cbfb7 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -32,7 +32,6 @@ #include "intel_context.h" #include "intel_buffer_objects.h" -#include "intel_batchbuffer.h" #include "intel_regions.h" #include "dri_bufmgr.h" @@ -46,7 +45,8 @@ intel_bufferobj_alloc_buffer(struct intel_context *intel, struct intel_buffer_object *intel_obj) { intel_obj->buffer = dri_bo_alloc(intel->bufmgr, "bufferobj", - intel_obj->Base.Size, 64); + intel_obj->Base.Size, 64, + DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED); } /** diff --git a/src/mesa/drivers/dri/intel/intel_buffers.c b/src/mesa/drivers/dri/intel/intel_buffers.c index 77352e6a53..75542a9775 100644 --- a/src/mesa/drivers/dri/intel/intel_buffers.c +++ b/src/mesa/drivers/dri/intel/intel_buffers.c @@ -819,8 +819,6 @@ intelSwapBuffers(__DRIdrawablePrivate * dPriv) intel_fb->swap_ust = ust; } - drmCommandNone(intel->driFd, DRM_I915_GEM_THROTTLE); - } else { /* XXX this shouldn't be an error but we can't handle it for now */ diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c new file mode 100644 index 0000000000..194814e8fb --- /dev/null +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c @@ -0,0 +1,1122 @@ +/************************************************************************** + * + * Copyright © 2007 Red Hat Inc. + * Copyright © 2007 Intel Corporation + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ +/* + * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com> + * Keith Whitwell <keithw-at-tungstengraphics-dot-com> + * Eric Anholt <eric@anholt.net> + * Dave Airlie <airlied@linux.ie> + */ + +#include <xf86drm.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <assert.h> + +#include "errno.h" +#include "mtypes.h" +#include "dri_bufmgr.h" +#include "string.h" +#include "imports.h" + +#include "i915_drm.h" + +#include "intel_bufmgr_ttm.h" +#ifdef TTM_API + +#define DBG(...) do { \ + if (bufmgr_ttm->bufmgr.debug) \ + fprintf(stderr, __VA_ARGS__); \ +} while (0) + +/* + * These bits are always specified in each validation + * request. Other bits are not supported at this point + * as it would require a bit of investigation to figure + * out what mask value should be used. + */ +#define INTEL_BO_MASK (DRM_BO_MASK_MEM | \ + DRM_BO_FLAG_READ | \ + DRM_BO_FLAG_WRITE | \ + DRM_BO_FLAG_EXE) + +struct intel_validate_entry { + dri_bo *bo; + struct drm_i915_op_arg bo_arg; +}; + +struct dri_ttm_bo_bucket_entry { + drmBO drm_bo; + struct dri_ttm_bo_bucket_entry *next; +}; + +struct dri_ttm_bo_bucket { + struct dri_ttm_bo_bucket_entry *head; + struct dri_ttm_bo_bucket_entry **tail; + /** + * Limit on the number of entries in this bucket. + * + * 0 means that this caching at this bucket size is disabled. + * -1 means that there is no limit to caching at this size. + */ + int max_entries; + int num_entries; +}; + +/* Arbitrarily chosen, 16 means that the maximum size we'll cache for reuse + * is 1 << 16 pages, or 256MB. + */ +#define INTEL_TTM_BO_BUCKETS 16 +typedef struct _dri_bufmgr_ttm { + dri_bufmgr bufmgr; + + int fd; + unsigned int fence_type; + unsigned int fence_type_flush; + + uint32_t max_relocs; + + struct intel_validate_entry *validate_array; + int validate_array_size; + int validate_count; + + /** Array of lists of cached drmBOs of power-of-two sizes */ + struct dri_ttm_bo_bucket cache_bucket[INTEL_TTM_BO_BUCKETS]; +} dri_bufmgr_ttm; + +/** + * Private information associated with a relocation that isn't already stored + * in the relocation buffer to be passed to the kernel. + */ +struct dri_ttm_reloc { + dri_bo *target_buf; + uint64_t validate_flags; + /** Offset of target_buf after last execution of this relocation entry. */ + unsigned int last_target_offset; +}; + +typedef struct _dri_bo_ttm { + dri_bo bo; + + int refcount; + unsigned int map_count; + drmBO drm_bo; + const char *name; + + uint64_t last_flags; + + /** + * Index of the buffer within the validation list while preparing a + * batchbuffer execution. + */ + int validate_index; + + /** DRM buffer object containing relocation list */ + uint32_t *reloc_buf_data; + struct dri_ttm_reloc *relocs; + + /** + * Indicates that the buffer may be shared with other processes, so we + * can't hold maps beyond when the user does. + */ + GLboolean shared; + + GLboolean delayed_unmap; + /* Virtual address from the dri_bo_map whose unmap was delayed. */ + void *saved_virtual; +} dri_bo_ttm; + +typedef struct _dri_fence_ttm +{ + dri_fence fence; + + int refcount; + const char *name; + drmFence drm_fence; +} dri_fence_ttm; + +static int +logbase2(int n) +{ + GLint i = 1; + GLint log2 = 0; + + while (n > i) { + i *= 2; + log2++; + } + + return log2; +} + +static struct dri_ttm_bo_bucket * +dri_ttm_bo_bucket_for_size(dri_bufmgr_ttm *bufmgr_ttm, unsigned long size) +{ + int i; + + /* We only do buckets in power of two increments */ + if ((size & (size - 1)) != 0) + return NULL; + + /* We should only see sizes rounded to pages. */ + assert((size % 4096) == 0); + + /* We always allocate in units of pages */ + i = ffs(size / 4096) - 1; + if (i >= INTEL_TTM_BO_BUCKETS) + return NULL; + + return &bufmgr_ttm->cache_bucket[i]; +} + + +static void dri_ttm_dump_validation_list(dri_bufmgr_ttm *bufmgr_ttm) +{ + int i, j; + + for (i = 0; i < bufmgr_ttm->validate_count; i++) { + dri_bo *bo = bufmgr_ttm->validate_array[i].bo; + dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; + + if (bo_ttm->reloc_buf_data != NULL) { + for (j = 0; j < (bo_ttm->reloc_buf_data[0] & 0xffff); j++) { + uint32_t *reloc_entry = bo_ttm->reloc_buf_data + + I915_RELOC_HEADER + + j * I915_RELOC0_STRIDE; + dri_bo *target_bo = bo_ttm->relocs[j].target_buf; + dri_bo_ttm *target_ttm = (dri_bo_ttm *)target_bo; + + DBG("%2d: %s@0x%08x -> %s@0x%08lx + 0x%08x\n", + i, + bo_ttm->name, reloc_entry[0], + target_ttm->name, target_bo->offset, + reloc_entry[1]); + } + } else { + DBG("%2d: %s\n", i, bo_ttm->name); + } + } +} + +/** + * Adds the given buffer to the list of buffers to be validated (moved into the + * appropriate memory type) with the next batch submission. + * + * If a buffer is validated multiple times in a batch submission, it ends up + * with the intersection of the memory type flags and the union of the + * access flags. + */ +static void +intel_add_validate_buffer(dri_bo *buf, + uint64_t flags) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr; + dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; + + /* If we delayed doing an unmap to mitigate map/unmap syscall thrashing, + * do that now. + */ + if (ttm_buf->delayed_unmap) { + drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo); + ttm_buf->delayed_unmap = GL_FALSE; + } + + if (ttm_buf->validate_index == -1) { + struct intel_validate_entry *entry; + struct drm_i915_op_arg *arg; + struct drm_bo_op_req *req; + int index; + + /* Extend the array of validation entries as necessary. */ + if (bufmgr_ttm->validate_count == bufmgr_ttm->validate_array_size) { + int i, new_size = bufmgr_ttm->validate_array_size * 2; + + if (new_size == 0) + new_size = 5; + + bufmgr_ttm->validate_array = + realloc(bufmgr_ttm->validate_array, + sizeof(struct intel_validate_entry) * new_size); + bufmgr_ttm->validate_array_size = new_size; + + /* Update pointers for realloced mem. */ + for (i = 0; i < bufmgr_ttm->validate_count - 1; i++) { + bufmgr_ttm->validate_array[i].bo_arg.next = (unsigned long) + &bufmgr_ttm->validate_array[i + 1].bo_arg; + } + } + + /* Pick out the new array entry for ourselves */ + index = bufmgr_ttm->validate_count; + ttm_buf->validate_index = index; + entry = &bufmgr_ttm->validate_array[index]; + bufmgr_ttm->validate_count++; + + /* Fill in array entry */ + entry->bo = buf; + dri_bo_reference(buf); + + /* Fill in kernel arg */ + arg = &entry->bo_arg; + req = &arg->d.req; + + memset(arg, 0, sizeof(*arg)); + req->bo_req.handle = ttm_buf->drm_bo.handle; + req->op = drm_bo_validate; + req->bo_req.flags = flags; + req->bo_req.hint = 0; +#ifdef DRM_BO_HINT_PRESUMED_OFFSET + /* PRESUMED_OFFSET indicates that all relocations pointing at this + * buffer have the correct offset. If any of our relocations don't, + * this flag will be cleared off the buffer later in the relocation + * processing. + */ + req->bo_req.hint |= DRM_BO_HINT_PRESUMED_OFFSET; + req->bo_req.presumed_offset = buf->offset; +#endif + req->bo_req.mask = INTEL_BO_MASK; + req->bo_req.fence_class = 0; /* Backwards compat. */ + + if (ttm_buf->reloc_buf_data != NULL) + arg->reloc_ptr = (unsigned long)(void *)ttm_buf->reloc_buf_data; + else + arg->reloc_ptr = 0; + + /* Hook up the linked list of args for the kernel */ + arg->next = 0; + if (index != 0) { + bufmgr_ttm->validate_array[index - 1].bo_arg.next = + (unsigned long)arg; + } + } else { + struct intel_validate_entry *entry = + &bufmgr_ttm->validate_array[ttm_buf->validate_index]; + struct drm_i915_op_arg *arg = &entry->bo_arg; + struct drm_bo_op_req *req = &arg->d.req; + uint64_t memFlags = req->bo_req.flags & flags & DRM_BO_MASK_MEM; + uint64_t modeFlags = (req->bo_req.flags | flags) & ~DRM_BO_MASK_MEM; + + /* Buffer was already in the validate list. Extend its flags as + * necessary. + */ + + if (memFlags == 0) { + fprintf(stderr, + "%s: No shared memory types between " + "0x%16llx and 0x%16llx\n", + __FUNCTION__, req->bo_req.flags, flags); + abort(); + } + if (flags & ~INTEL_BO_MASK) { + fprintf(stderr, + "%s: Flags bits 0x%16llx are not supposed to be used in a relocation\n", + __FUNCTION__, flags & ~INTEL_BO_MASK); + abort(); + } + req->bo_req.flags = memFlags | modeFlags; + } +} + + +#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ + sizeof(uint32_t)) + +static int +intel_setup_reloc_list(dri_bo *bo) +{ + dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bo->bufmgr; + + bo_ttm->relocs = calloc(bufmgr_ttm->max_relocs, + sizeof(struct dri_ttm_reloc)); + bo_ttm->reloc_buf_data = calloc(1, RELOC_BUF_SIZE(bufmgr_ttm->max_relocs)); + + /* Initialize the relocation list with the header: + * DWORD 0: relocation count + * DWORD 1: relocation type + * DWORD 2+3: handle to next relocation list (currently none) 64-bits + */ + bo_ttm->reloc_buf_data[0] = 0; + bo_ttm->reloc_buf_data[1] = I915_RELOC_TYPE_0; + bo_ttm->reloc_buf_data[2] = 0; + bo_ttm->reloc_buf_data[3] = 0; + + return 0; +} + +#if 0 +int +driFenceSignaled(DriFenceObject * fence, unsigned type) +{ + int signaled; + int ret; + + if (fence == NULL) + return GL_TRUE; + + ret = drmFenceSignaled(bufmgr_ttm->fd, &fence->fence, type, &signaled); + BM_CKFATAL(ret); + return signaled; +} +#endif + +static dri_bo * +dri_ttm_alloc(dri_bufmgr *bufmgr, const char *name, + unsigned long size, unsigned int alignment, + uint64_t location_mask) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; + dri_bo_ttm *ttm_buf; + unsigned int pageSize = getpagesize(); + int ret; + uint64_t flags; + unsigned int hint; + unsigned long alloc_size; + struct dri_ttm_bo_bucket *bucket; + GLboolean alloc_from_cache = GL_FALSE; + + ttm_buf = calloc(1, sizeof(*ttm_buf)); + if (!ttm_buf) + return NULL; + + /* The mask argument doesn't do anything for us that we want other than + * determine which pool (TTM or local) the buffer is allocated into, so + * just pass all of the allocation class flags. + */ + flags = location_mask | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE | + DRM_BO_FLAG_EXE; + /* No hints we want to use. */ + hint = 0; + + /* Round the allocated size up to a power of two number of pages. */ + alloc_size = 1 << logbase2(size); + if (alloc_size < pageSize) + alloc_size = pageSize; + bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, alloc_size); + + /* If we don't have caching at this size, don't actually round the + * allocation up. + */ + if (bucket == NULL || bucket->max_entries == 0) + alloc_size = size; + + /* Get a buffer out of the cache if available */ + if (bucket != NULL && bucket->num_entries > 0) { + struct dri_ttm_bo_bucket_entry *entry = bucket->head; + int busy; + + /* Check if the buffer is still in flight. If not, reuse it. */ + ret = drmBOBusy(bufmgr_ttm->fd, &entry->drm_bo, &busy); + alloc_from_cache = (ret == 0 && busy == 0); + + if (alloc_from_cache) { + bucket->head = entry->next; + if (entry->next == NULL) + bucket->tail = &bucket->head; + bucket->num_entries--; + + ttm_buf->drm_bo = entry->drm_bo; + free(entry); + } + } + + if (!alloc_from_cache) { + ret = drmBOCreate(bufmgr_ttm->fd, alloc_size, alignment / pageSize, + NULL, flags, hint, &ttm_buf->drm_bo); + if (ret != 0) { + free(ttm_buf); + return NULL; + } + } + + ttm_buf->bo.size = size; + ttm_buf->bo.offset = ttm_buf->drm_bo.offset; + ttm_buf->bo.virtual = NULL; + ttm_buf->bo.bufmgr = bufmgr; + ttm_buf->name = name; + ttm_buf->refcount = 1; + ttm_buf->reloc_buf_data = NULL; + ttm_buf->relocs = NULL; + ttm_buf->last_flags = ttm_buf->drm_bo.flags; + ttm_buf->shared = GL_FALSE; + ttm_buf->delayed_unmap = GL_FALSE; + ttm_buf->validate_index = -1; + + DBG("bo_create: %p (%s) %ldb\n", &ttm_buf->bo, ttm_buf->name, size); + + return &ttm_buf->bo; +} + +/* Our TTM backend doesn't allow creation of static buffers, as that requires + * privelege for the non-fake case, and the lock in the fake case where we were + * working around the X Server not creating buffers and passing handles to us. + */ +static dri_bo * +dri_ttm_alloc_static(dri_bufmgr *bufmgr, const char *name, + unsigned long offset, unsigned long size, void *virtual, + uint64_t location_mask) +{ + return NULL; +} + +/** + * Returns a dri_bo wrapping the given buffer object handle. + * + * This can be used when one application needs to pass a buffer object + * to another. + */ +dri_bo * +intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, + unsigned int handle) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; + dri_bo_ttm *ttm_buf; + int ret; + + ttm_buf = calloc(1, sizeof(*ttm_buf)); + if (!ttm_buf) + return NULL; + + ret = drmBOReference(bufmgr_ttm->fd, handle, &ttm_buf->drm_bo); + if (ret != 0) { + fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n", + name, handle, strerror(-ret)); + free(ttm_buf); + return NULL; + } + ttm_buf->bo.size = ttm_buf->drm_bo.size; + ttm_buf->bo.offset = ttm_buf->drm_bo.offset; + ttm_buf->bo.virtual = NULL; + ttm_buf->bo.bufmgr = bufmgr; + ttm_buf->name = name; + ttm_buf->refcount = 1; + ttm_buf->reloc_buf_data = NULL; + ttm_buf->relocs = NULL; + ttm_buf->last_flags = ttm_buf->drm_bo.flags; + ttm_buf->shared = GL_TRUE; + ttm_buf->delayed_unmap = GL_FALSE; + ttm_buf->validate_index = -1; + + DBG("bo_create_from_handle: %p %08x (%s)\n", + &ttm_buf->bo, handle, ttm_buf->name); + + return &ttm_buf->bo; +} + +static void +dri_ttm_bo_reference(dri_bo *buf) +{ + dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; + + ttm_buf->refcount++; +} + +static void +dri_ttm_bo_unreference(dri_bo *buf) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr; + dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; + + if (!buf) + return; + + if (--ttm_buf->refcount == 0) { + struct dri_ttm_bo_bucket *bucket; + int ret; + + assert(ttm_buf->map_count == 0); + + if (ttm_buf->reloc_buf_data) { + int i; + + /* Unreference all the target buffers */ + for (i = 0; i < (ttm_buf->reloc_buf_data[0] & 0xffff); i++) + dri_bo_unreference(ttm_buf->relocs[i].target_buf); + free(ttm_buf->relocs); + + /* Free the kernel BO containing relocation entries */ + free(ttm_buf->reloc_buf_data); + ttm_buf->reloc_buf_data = NULL; + } + + if (ttm_buf->delayed_unmap) { + int ret = drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo); + + if (ret != 0) { + fprintf(stderr, "%s:%d: Error unmapping buffer %s: %s.\n", + __FILE__, __LINE__, ttm_buf->name, strerror(-ret)); + } + } + + bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, ttm_buf->drm_bo.size); + /* Put the buffer into our internal cache for reuse if we can. */ + if (!ttm_buf->shared && + bucket != NULL && + (bucket->max_entries == -1 || + (bucket->max_entries > 0 && + bucket->num_entries < bucket->max_entries))) + { + struct dri_ttm_bo_bucket_entry *entry; + + entry = calloc(1, sizeof(*entry)); + entry->drm_bo = ttm_buf->drm_bo; + + entry->next = NULL; + *bucket->tail = entry; + bucket->tail = &entry->next; + bucket->num_entries++; + } else { + /* Decrement the kernel refcount for the buffer. */ + ret = drmBOUnreference(bufmgr_ttm->fd, &ttm_buf->drm_bo); + if (ret != 0) { + fprintf(stderr, "drmBOUnreference failed (%s): %s\n", + ttm_buf->name, strerror(-ret)); + } + } + + DBG("bo_unreference final: %p (%s)\n", &ttm_buf->bo, ttm_buf->name); + + free(buf); + return; + } +} + +static int +dri_ttm_bo_map(dri_bo *buf, GLboolean write_enable) +{ + dri_bufmgr_ttm *bufmgr_ttm; + dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; + uint64_t flags; + int ret; + + bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr; + + flags = DRM_BO_FLAG_READ; + if (write_enable) + flags |= DRM_BO_FLAG_WRITE; + + /* Allow recursive mapping. Mesa may recursively map buffers with + * nested display loops. + */ + if (ttm_buf->map_count++ != 0) + return 0; + + assert(buf->virtual == NULL); + + DBG("bo_map: %p (%s)\n", &ttm_buf->bo, ttm_buf->name); + + /* XXX: What about if we're upgrading from READ to WRITE? */ + if (ttm_buf->delayed_unmap) { + buf->virtual = ttm_buf->saved_virtual; + return 0; + } + + ret = drmBOMap(bufmgr_ttm->fd, &ttm_buf->drm_bo, flags, 0, &buf->virtual); + if (ret != 0) { + fprintf(stderr, "%s:%d: Error mapping buffer %s: %s .\n", + __FILE__, __LINE__, ttm_buf->name, strerror(-ret)); + } + + return ret; +} + +static int +dri_ttm_bo_unmap(dri_bo *buf) +{ + dri_bufmgr_ttm *bufmgr_ttm; + dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; + int ret; + + if (buf == NULL) + return 0; + + assert(ttm_buf->map_count != 0); + if (--ttm_buf->map_count != 0) + return 0; + + bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr; + + assert(buf->virtual != NULL); + + DBG("bo_unmap: %p (%s)\n", &ttm_buf->bo, ttm_buf->name); + + if (!ttm_buf->shared) { + ttm_buf->saved_virtual = buf->virtual; + ttm_buf->delayed_unmap = GL_TRUE; + buf->virtual = NULL; + + return 0; + } + + buf->virtual = NULL; + + ret = drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo); + if (ret != 0) { + fprintf(stderr, "%s:%d: Error unmapping buffer %s: %s.\n", + __FILE__, __LINE__, ttm_buf->name, strerror(-ret)); + } + + return ret; +} + +/** + * Returns a dri_bo wrapping the given buffer object handle. + * + * This can be used when one application needs to pass a buffer object + * to another. + */ +dri_fence * +intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name, + drm_fence_arg_t *arg) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; + dri_fence_ttm *ttm_fence; + + ttm_fence = malloc(sizeof(*ttm_fence)); + if (!ttm_fence) + return NULL; + + ttm_fence->drm_fence.handle = arg->handle; + ttm_fence->drm_fence.fence_class = arg->fence_class; + ttm_fence->drm_fence.type = arg->type; + ttm_fence->drm_fence.flags = arg->flags; + ttm_fence->drm_fence.signaled = 0; + ttm_fence->drm_fence.sequence = arg->sequence; + + ttm_fence->fence.bufmgr = bufmgr; + ttm_fence->name = name; + ttm_fence->refcount = 1; + + DBG("fence_create_from_handle: %p (%s)\n", + &ttm_fence->fence, ttm_fence->name); + + return &ttm_fence->fence; +} + + +static void +dri_ttm_fence_reference(dri_fence *fence) +{ + dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence; + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr; + + ++fence_ttm->refcount; + DBG("fence_reference: %p (%s)\n", &fence_ttm->fence, fence_ttm->name); +} + +static void +dri_ttm_fence_unreference(dri_fence *fence) +{ + dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence; + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr; + + if (!fence) + return; + + DBG("fence_unreference: %p (%s)\n", &fence_ttm->fence, fence_ttm->name); + + if (--fence_ttm->refcount == 0) { + int ret; + + ret = drmFenceUnreference(bufmgr_ttm->fd, &fence_ttm->drm_fence); + if (ret != 0) { + fprintf(stderr, "drmFenceUnreference failed (%s): %s\n", + fence_ttm->name, strerror(-ret)); + } + + free(fence); + return; + } +} + +static void +dri_ttm_fence_wait(dri_fence *fence) +{ + dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence; + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr; + int ret; + + ret = drmFenceWait(bufmgr_ttm->fd, DRM_FENCE_FLAG_WAIT_LAZY, &fence_ttm->drm_fence, 0); + if (ret != 0) { + fprintf(stderr, "%s:%d: Error waiting for fence %s: %s.\n", + __FILE__, __LINE__, fence_ttm->name, strerror(-ret)); + abort(); + } + + DBG("fence_wait: %p (%s)\n", &fence_ttm->fence, fence_ttm->name); +} + +static void +dri_bufmgr_ttm_destroy(dri_bufmgr *bufmgr) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; + int i; + + free(bufmgr_ttm->validate_array); + + /* Free any cached buffer objects we were going to reuse */ + for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) { + struct dri_ttm_bo_bucket *bucket = &bufmgr_ttm->cache_bucket[i]; + struct dri_ttm_bo_bucket_entry *entry; + + while ((entry = bucket->head) != NULL) { + int ret; + + bucket->head = entry->next; + if (entry->next == NULL) + bucket->tail = &bucket->head; + bucket->num_entries--; + + /* Decrement the kernel refcount for the buffer. */ + ret = drmBOUnreference(bufmgr_ttm->fd, &entry->drm_bo); + if (ret != 0) { + fprintf(stderr, "drmBOUnreference failed: %s\n", + strerror(-ret)); + } + + free(entry); + } + } + + free(bufmgr); +} + +/** + * Adds the target buffer to the validation list and adds the relocation + * to the reloc_buffer's relocation list. + * + * The relocation entry at the given offset must already contain the + * precomputed relocation value, because the kernel will optimize out + * the relocation entry write when the buffer hasn't moved from the + * last known offset in target_buf. + */ +static int +dri_ttm_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta, + GLuint offset, dri_bo *target_buf) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)reloc_buf->bufmgr; + dri_bo_ttm *reloc_buf_ttm = (dri_bo_ttm *)reloc_buf; + dri_bo_ttm *target_buf_ttm = (dri_bo_ttm *)target_buf; + int num_relocs; + uint32_t *this_reloc; + + /* Create a new relocation list if needed */ + if (reloc_buf_ttm->reloc_buf_data == NULL) + intel_setup_reloc_list(reloc_buf); + + num_relocs = reloc_buf_ttm->reloc_buf_data[0]; + + /* Check overflow */ + assert(num_relocs < bufmgr_ttm->max_relocs); + + this_reloc = reloc_buf_ttm->reloc_buf_data + I915_RELOC_HEADER + + num_relocs * I915_RELOC0_STRIDE; + + this_reloc[0] = offset; + this_reloc[1] = delta; + this_reloc[2] = target_buf_ttm->drm_bo.handle; /* To be filled in at exec time */ + this_reloc[3] = 0; + + reloc_buf_ttm->relocs[num_relocs].validate_flags = flags; + reloc_buf_ttm->relocs[num_relocs].target_buf = target_buf; + dri_bo_reference(target_buf); + + reloc_buf_ttm->reloc_buf_data[0]++; /* Increment relocation count */ + /* Check wraparound */ + assert(reloc_buf_ttm->reloc_buf_data[0] != 0); + return 0; +} + +/** + * Walk the tree of relocations rooted at BO and accumulate the list of + * validations to be performed and update the relocation buffers with + * index values into the validation list. + */ +static void +dri_ttm_bo_process_reloc(dri_bo *bo) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bo->bufmgr; + dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; + unsigned int nr_relocs; + int i; + + if (bo_ttm->reloc_buf_data == NULL) + return; + + nr_relocs = bo_ttm->reloc_buf_data[0] & 0xffff; + + for (i = 0; i < nr_relocs; i++) { + struct dri_ttm_reloc *r = &bo_ttm->relocs[i]; + + /* Continue walking the tree depth-first. */ + dri_ttm_bo_process_reloc(r->target_buf); + + /* Add the target to the validate list */ + intel_add_validate_buffer(r->target_buf, r->validate_flags); + + /* Clear the PRESUMED_OFFSET flag from the validate list entry of the + * target if this buffer has a stale relocated pointer at it. + */ + if (r->last_target_offset != r->target_buf->offset) { + dri_bo_ttm *target_buf_ttm = (dri_bo_ttm *)r->target_buf; + struct intel_validate_entry *entry = + &bufmgr_ttm->validate_array[target_buf_ttm->validate_index]; + + entry->bo_arg.d.req.bo_req.hint &= ~DRM_BO_HINT_PRESUMED_OFFSET; + } + } +} + +static void * +dri_ttm_process_reloc(dri_bo *batch_buf, GLuint *count) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr; + + /* Update indices and set up the validate list. */ + dri_ttm_bo_process_reloc(batch_buf); + + /* Add the batch buffer to the validation list. There are no relocations + * pointing to it. + */ + intel_add_validate_buffer(batch_buf, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE); + + *count = bufmgr_ttm->validate_count; + return &bufmgr_ttm->validate_array[0].bo_arg; +} + +static const char * +intel_get_flags_mem_type_string(uint64_t flags) +{ + switch (flags & DRM_BO_MASK_MEM) { + case DRM_BO_FLAG_MEM_LOCAL: return "local"; + case DRM_BO_FLAG_MEM_TT: return "ttm"; + case DRM_BO_FLAG_MEM_VRAM: return "vram"; + case DRM_BO_FLAG_MEM_PRIV0: return "priv0"; + case DRM_BO_FLAG_MEM_PRIV1: return "priv1"; + case DRM_BO_FLAG_MEM_PRIV2: return "priv2"; + case DRM_BO_FLAG_MEM_PRIV3: return "priv3"; + case DRM_BO_FLAG_MEM_PRIV4: return "priv4"; + default: return NULL; + } +} + +static const char * +intel_get_flags_caching_string(uint64_t flags) +{ + switch (flags & (DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED)) { + case 0: return "UU"; + case DRM_BO_FLAG_CACHED: return "CU"; + case DRM_BO_FLAG_CACHED_MAPPED: return "UC"; + case DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED: return "CC"; + default: return NULL; + } +} + +static void +intel_update_buffer_offsets (dri_bufmgr_ttm *bufmgr_ttm) +{ + int i; + + for (i = 0; i < bufmgr_ttm->validate_count; i++) { + dri_bo *bo = bufmgr_ttm->validate_array[i].bo; + dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; + struct drm_i915_op_arg *arg = &bufmgr_ttm->validate_array[i].bo_arg; + struct drm_bo_arg_rep *rep = &arg->d.rep; + + /* Update the flags */ + if (rep->bo_info.flags != bo_ttm->last_flags) { + DBG("BO %s migrated: %s/%s -> %s/%s\n", + bo_ttm->name, + intel_get_flags_mem_type_string(bo_ttm->last_flags), + intel_get_flags_caching_string(bo_ttm->last_flags), + intel_get_flags_mem_type_string(rep->bo_info.flags), + intel_get_flags_caching_string(rep->bo_info.flags)); + + bo_ttm->last_flags = rep->bo_info.flags; + } + /* Update the buffer offset */ + if (rep->bo_info.offset != bo->offset) { + DBG("BO %s migrated: 0x%08lx -> 0x%08lx\n", + bo_ttm->name, bo->offset, (unsigned long)rep->bo_info.offset); + bo->offset = rep->bo_info.offset; + } + } +} + +/** + * Update the last target offset field of relocation entries for PRESUMED_OFFSET + * computation. + */ +static void +dri_ttm_bo_post_submit(dri_bo *bo) +{ + dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; + unsigned int nr_relocs; + int i; + + if (bo_ttm->reloc_buf_data == NULL) + return; + + nr_relocs = bo_ttm->reloc_buf_data[0] & 0xffff; + + for (i = 0; i < nr_relocs; i++) { + struct dri_ttm_reloc *r = &bo_ttm->relocs[i]; + + /* Continue walking the tree depth-first. */ + dri_ttm_bo_post_submit(r->target_buf); + + r->last_target_offset = r->target_buf->offset; + } +} + +static void +dri_ttm_post_submit(dri_bo *batch_buf, dri_fence **last_fence) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr; + int i; + + intel_update_buffer_offsets (bufmgr_ttm); + + dri_ttm_bo_post_submit(batch_buf); + + if (bufmgr_ttm->bufmgr.debug) + dri_ttm_dump_validation_list(bufmgr_ttm); + + for (i = 0; i < bufmgr_ttm->validate_count; i++) { + dri_bo *bo = bufmgr_ttm->validate_array[i].bo; + dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; + + /* Disconnect the buffer from the validate list */ + bo_ttm->validate_index = -1; + dri_bo_unreference(bo); + bufmgr_ttm->validate_array[i].bo = NULL; + } + bufmgr_ttm->validate_count = 0; +} + +/** + * Enables unlimited caching of buffer objects for reuse. + * + * This is potentially very memory expensive, as the cache at each bucket + * size is only bounded by how many buffers of that size we've managed to have + * in flight at once. + */ +void +intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; + int i; + + for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) { + bufmgr_ttm->cache_bucket[i].max_entries = -1; + } +} + +/* + * + */ +static int +dri_ttm_check_aperture_space(dri_bo *bo) +{ + return 0; +} + +/** + * Initializes the TTM buffer manager, which uses the kernel to allocate, map, + * and manage map buffer objections. + * + * \param fd File descriptor of the opened DRM device. + * \param fence_type Driver-specific fence type used for fences with no flush. + * \param fence_type_flush Driver-specific fence type used for fences with a + * flush. + */ +dri_bufmgr * +intel_bufmgr_ttm_init(int fd, unsigned int fence_type, + unsigned int fence_type_flush, int batch_size) +{ + dri_bufmgr_ttm *bufmgr_ttm; + int i; + + bufmgr_ttm = calloc(1, sizeof(*bufmgr_ttm)); + bufmgr_ttm->fd = fd; + bufmgr_ttm->fence_type = fence_type; + bufmgr_ttm->fence_type_flush = fence_type_flush; + + /* Let's go with one relocation per every 2 dwords (but round down a bit + * since a power of two will mean an extra page allocation for the reloc + * buffer). + * + * Every 4 was too few for the blender benchmark. + */ + bufmgr_ttm->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; + + bufmgr_ttm->bufmgr.bo_alloc = dri_ttm_alloc; + bufmgr_ttm->bufmgr.bo_alloc_static = dri_ttm_alloc_static; + bufmgr_ttm->bufmgr.bo_reference = dri_ttm_bo_reference; + bufmgr_ttm->bufmgr.bo_unreference = dri_ttm_bo_unreference; + bufmgr_ttm->bufmgr.bo_map = dri_ttm_bo_map; + bufmgr_ttm->bufmgr.bo_unmap = dri_ttm_bo_unmap; + bufmgr_ttm->bufmgr.fence_reference = dri_ttm_fence_reference; + bufmgr_ttm->bufmgr.fence_unreference = dri_ttm_fence_unreference; + bufmgr_ttm->bufmgr.fence_wait = dri_ttm_fence_wait; + bufmgr_ttm->bufmgr.destroy = dri_bufmgr_ttm_destroy; + bufmgr_ttm->bufmgr.emit_reloc = dri_ttm_emit_reloc; + bufmgr_ttm->bufmgr.process_relocs = dri_ttm_process_reloc; + bufmgr_ttm->bufmgr.post_submit = dri_ttm_post_submit; + bufmgr_ttm->bufmgr.debug = GL_FALSE; + bufmgr_ttm->bufmgr.check_aperture_space = dri_ttm_check_aperture_space; + /* Initialize the linked lists for BO reuse cache. */ + for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) + bufmgr_ttm->cache_bucket[i].tail = &bufmgr_ttm->cache_bucket[i].head; + + return &bufmgr_ttm->bufmgr; +} +#else +dri_bufmgr * +intel_bufmgr_ttm_init(int fd, unsigned int fence_type, + unsigned int fence_type_flush, int batch_size) +{ + return NULL; +} + +dri_bo * +intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, + unsigned int handle) +{ + return NULL; +} + +void +intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr) +{ +} +#endif diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h new file mode 100644 index 0000000000..f5bd64c90f --- /dev/null +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h @@ -0,0 +1,28 @@ + +#ifndef INTEL_BUFMGR_TTM_H +#define INTEL_BUFMGR_TTM_H + +#include "dri_bufmgr.h" + +extern dri_bo *intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, + unsigned int handle); + +#ifdef TTM_API +dri_fence *intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name, + drm_fence_arg_t *arg); +#endif + + +dri_bufmgr *intel_bufmgr_ttm_init(int fd, unsigned int fence_type, + unsigned int fence_type_flush, int batch_size); + +void +intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr); + +#ifndef TTM_API +#define DRM_I915_FENCE_CLASS_ACCEL 0 +#define DRM_I915_FENCE_TYPE_RW 2 +#define DRM_I915_FENCE_FLAG_FLUSHED 0x01000000 +#endif + +#endif diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 50c1964d87..5fa9d95e11 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -59,7 +59,7 @@ #include "intel_buffer_objects.h" #include "intel_fbo.h" #include "intel_decode.h" -#include "intel_bufmgr.h" +#include "intel_bufmgr_ttm.h" #include "drirenderbuffer.h" #include "vblank.h" @@ -96,13 +96,11 @@ int INTEL_DEBUG = (0); #include "extension_helper.h" -#define DRIVER_DATE "20080716" -#define DRIVER_DATE_GEM "GEM " DRIVER_DATE +#define DRIVER_DATE "20061102" static const GLubyte * intelGetString(GLcontext * ctx, GLenum name) { - const struct intel_context *const intel = intel_context(ctx); const char *chipset; static char buffer[128]; @@ -112,7 +110,7 @@ intelGetString(GLcontext * ctx, GLenum name) break; case GL_RENDERER: - switch (intel->intelScreen->deviceID) { + switch (intel_context(ctx)->intelScreen->deviceID) { case PCI_CHIP_845_G: chipset = "Intel(R) 845G"; break; @@ -185,9 +183,7 @@ intelGetString(GLcontext * ctx, GLenum name) break; } - (void) driGetRendererString(buffer, chipset, - (intel->ttm) ? DRIVER_DATE_GEM : DRIVER_DATE, - 0); + (void) driGetRendererString(buffer, chipset, DRIVER_DATE, 0); return (GLubyte *) buffer; default: @@ -371,34 +367,22 @@ intelFlush(GLcontext * ctx) if (!IS_965(intel->intelScreen->deviceID)) INTEL_FIREVERTICES(intel); - /* Emit a flush so that any frontbuffer rendering that might have occurred - * lands onscreen in a timely manner, even if the X Server doesn't trigger - * a flush for us. - */ - intel_batchbuffer_emit_mi_flush(intel->batch); - if (intel->batch->map != intel->batch->ptr) intel_batchbuffer_flush(intel->batch); + + /* XXX: Need to do an MI_FLUSH here. + */ } void intelFinish(GLcontext * ctx) { - struct gl_framebuffer *fb = ctx->DrawBuffer; - int i; - + struct intel_context *intel = intel_context(ctx); intelFlush(ctx); - - for (i = 0; i < fb->_NumColorDrawBuffers; i++) { - struct intel_renderbuffer *irb; - - irb = intel_renderbuffer(fb->_ColorDrawBuffers[i]); - - if (irb->region) - dri_bo_wait_rendering(irb->region->buffer); - } - if (fb->_DepthBuffer) { - /* XXX: Wait on buffer idle */ + if (intel->batch->last_fence) { + dri_fence_wait(intel->batch->last_fence); + dri_fence_unreference(intel->batch->last_fence); + intel->batch->last_fence = NULL; } } @@ -464,32 +448,28 @@ static GLboolean intel_init_bufmgr(struct intel_context *intel) { intelScreenPrivate *intelScreen = intel->intelScreen; - GLboolean gem_disable = getenv("INTEL_NO_GEM") != NULL; - int gem_kernel = 0; - GLboolean gem_supported; - struct drm_i915_getparam gp; - - gp.param = I915_PARAM_HAS_GEM; - gp.value = &gem_kernel; + GLboolean ttm_disable = getenv("INTEL_NO_TTM") != NULL; + GLboolean ttm_supported; - (void) drmCommandWriteRead(intel->driFd, DRM_I915_GETPARAM, &gp, sizeof(gp)); - - /* If we've got a new enough DDX that's initializing GEM and giving us + /* If we've got a new enough DDX that's initializing TTM and giving us * object handles for the shared buffers, use that. */ intel->ttm = GL_FALSE; if (intel->intelScreen->driScrnPriv->dri2.enabled) - gem_supported = GL_TRUE; + ttm_supported = GL_TRUE; else if (intel->intelScreen->driScrnPriv->ddx_version.minor >= 9 && - gem_kernel && + intel->intelScreen->drmMinor >= 11 && intel->intelScreen->front.bo_handle != -1) - gem_supported = GL_TRUE; + ttm_supported = GL_TRUE; else - gem_supported = GL_FALSE; + ttm_supported = GL_FALSE; - if (!gem_disable && gem_supported) { + if (!ttm_disable && ttm_supported) { int bo_reuse_mode; - intel->bufmgr = intel_bufmgr_gem_init(intel->driFd, + intel->bufmgr = intel_bufmgr_ttm_init(intel->driFd, + DRM_FENCE_TYPE_EXE, + DRM_FENCE_TYPE_EXE | + DRM_I915_FENCE_TYPE_RW, BATCH_SZ); if (intel->bufmgr != NULL) intel->ttm = GL_TRUE; @@ -499,16 +479,16 @@ intel_init_bufmgr(struct intel_context *intel) case DRI_CONF_BO_REUSE_DISABLED: break; case DRI_CONF_BO_REUSE_ALL: - intel_bufmgr_gem_enable_reuse(intel->bufmgr); + intel_ttm_enable_bo_reuse(intel->bufmgr); break; } } /* Otherwise, use the classic buffer manager. */ if (intel->bufmgr == NULL) { - if (gem_disable) { - fprintf(stderr, "GEM disabled. Using classic.\n"); + if (ttm_disable) { + fprintf(stderr, "TTM buffer manager disabled. Using classic.\n"); } else { - fprintf(stderr, "Failed to initialize GEM. " + fprintf(stderr, "Failed to initialize TTM buffer manager. " "Falling back to classic.\n"); } @@ -518,17 +498,14 @@ intel_init_bufmgr(struct intel_context *intel) return GL_FALSE; } - intel->bufmgr = intel_bufmgr_fake_init(intelScreen->tex.offset, - intelScreen->tex.map, - intelScreen->tex.size, - intel_fence_emit, - intel_fence_wait, - intel); + intel->bufmgr = dri_bufmgr_fake_init(intelScreen->tex.offset, + intelScreen->tex.map, + intelScreen->tex.size, + intel_fence_emit, + intel_fence_wait, + intel); } - /* XXX bufmgr should be per-screen, not per-context */ - intelScreen->ttm = intel->ttm; - return GL_TRUE; } @@ -696,6 +673,8 @@ intelInitContext(struct intel_context *intel, intel_recreate_static_regions(intel); intel->batch = intel_batchbuffer_alloc(intel); + intel->last_swap_fence = NULL; + intel->first_swap_fence = NULL; intel_bufferobj_init(intel); intel_fbo_init(intel); @@ -713,6 +692,7 @@ intelInitContext(struct intel_context *intel, /* Force all software fallbacks */ if (driQueryOptionb(&intel->optionCache, "no_rast")) { fprintf(stderr, "disabling 3D rasterization\n"); + FALLBACK(intel, INTEL_FALLBACK_USER, 1); intel->no_rast = 1; } @@ -747,7 +727,17 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) intel->Fallback = 0; /* don't call _swrast_Flush later */ intel_batchbuffer_free(intel->batch); - free(intel->prim.vb); + + if (intel->last_swap_fence) { + dri_fence_wait(intel->last_swap_fence); + dri_fence_unreference(intel->last_swap_fence); + intel->last_swap_fence = NULL; + } + if (intel->first_swap_fence) { + dri_fence_wait(intel->first_swap_fence); + dri_fence_unreference(intel->first_swap_fence); + intel->first_swap_fence = NULL; + } if (release_texture_heaps) { /* This share group is about to go away, free our private @@ -899,7 +889,7 @@ intelContendedLock(struct intel_context *intel, GLuint flags) */ if (!intel->ttm && sarea->texAge != intel->hHWContext) { sarea->texAge = intel->hHWContext; - intel_bufmgr_fake_contended_lock_take(intel->bufmgr); + dri_bufmgr_fake_contended_lock_take(intel->bufmgr); if (INTEL_DEBUG & DEBUG_BATCH) intel_decode_context_reset(); if (INTEL_DEBUG & DEBUG_BUFMGR) @@ -1027,7 +1017,6 @@ void UNLOCK_HARDWARE( struct intel_context *intel ) * Nothing should be left in batch outside of LOCK/UNLOCK which references * cliprects. */ - if (intel->batch->cliprect_mode == REFERENCES_CLIPRECTS) - intel_batchbuffer_flush(intel->batch); + assert(intel->batch->cliprect_mode != REFERENCES_CLIPRECTS); } diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index f9a373cf74..df79ab8897 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -35,7 +35,6 @@ #include "mm.h" #include "texmem.h" #include "dri_bufmgr.h" -#include "intel_bufmgr.h" #include "intel_screen.h" #include "intel_tex_obj.h" @@ -86,7 +85,6 @@ struct intel_context { void (*destroy) (struct intel_context * intel); void (*emit_state) (struct intel_context * intel); - void (*finish_batch) (struct intel_context * intel); void (*new_batch) (struct intel_context * intel); void (*emit_invarient_state) (struct intel_context * intel); void (*note_fence) (struct intel_context *intel, GLuint fence); @@ -176,6 +174,9 @@ struct intel_context */ GLboolean ttm; + dri_fence *last_swap_fence; + dri_fence *first_swap_fence; + struct intel_batchbuffer *batch; GLboolean no_batch_wrap; unsigned batch_id; @@ -183,13 +184,9 @@ struct intel_context struct { GLuint id; - uint32_t primitive; /**< Current hardware primitive type */ + GLuint primitive; + GLubyte *start_ptr; void (*flush) (struct intel_context *); - dri_bo *vb_bo; - uint8_t *vb; - unsigned int start_offset; /**< Byte offset of primitive sequence */ - unsigned int current_offset; /**< Byte offset of next vertex */ - unsigned int count; /**< Number of vertices in current primitive */ } prim; GLuint stats_wm; @@ -294,7 +291,6 @@ extern char *__progname; #define SUBPIXEL_X 0.125 #define SUBPIXEL_Y 0.125 -#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) #define ALIGN(value, alignment) ((value + alignment - 1) & ~(alignment - 1)) #define INTEL_FIREVERTICES(intel) \ diff --git a/src/mesa/drivers/dri/intel/intel_decode.c b/src/mesa/drivers/dri/intel/intel_decode.c index 9c105013c0..a1240639f4 100644 --- a/src/mesa/drivers/dri/intel/intel_decode.c +++ b/src/mesa/drivers/dri/intel/intel_decode.c @@ -183,10 +183,9 @@ decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures) switch ((data[0] & 0x1fc00000) >> 22) { case 0x50: instr_out(data, hw_offset, 0, - "XY_COLOR_BLT (rgb %sabled, alpha %sabled, dst tile %d)\n", + "XY_COLOR_BLT (rgb %sabled, alpha %sabled)\n", (data[0] & (1 << 20)) ? "en" : "dis", - (data[0] & (1 << 21)) ? "en" : "dis", - (data[0] >> 11) & 1); + (data[0] & (1 << 21)) ? "en" : "dis"); len = (data[0] & 0x000000ff) + 2; if (len != 6) @@ -211,8 +210,7 @@ decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures) instr_out(data, hw_offset, 1, "format %s, pitch %d, " "clipping %sabled\n", format, - (short)(data[1] & 0xffff), - data[1] & (1 << 30) ? "en" : "dis"); + data[1] & 0xffff, data[1] & (1 << 30) ? "en" : "dis"); instr_out(data, hw_offset, 2, "(%d,%d)\n", data[2] & 0xffff, data[2] >> 16); instr_out(data, hw_offset, 3, "(%d,%d)\n", @@ -222,12 +220,9 @@ decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures) return len; case 0x53: instr_out(data, hw_offset, 0, - "XY_SRC_COPY_BLT (rgb %sabled, alpha %sabled, " - "src tile %d, dst tile %d)\n", + "XY_SRC_COPY_BLT (rgb %sabled, alpha %sabled)\n", (data[0] & (1 << 20)) ? "en" : "dis", - (data[0] & (1 << 21)) ? "en" : "dis", - (data[0] >> 15) & 1, - (data[0] >> 11) & 1); + (data[0] & (1 << 21)) ? "en" : "dis"); len = (data[0] & 0x000000ff) + 2; if (len != 8) @@ -252,17 +247,16 @@ decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures) instr_out(data, hw_offset, 1, "format %s, dst pitch %d, " "clipping %sabled\n", format, - (short)(data[1] & 0xffff), - data[1] & (1 << 30) ? "en" : "dis"); + data[1] & 0xffff, data[1] & (1 << 30) ? "en" : "dis"); instr_out(data, hw_offset, 2, "dst (%d,%d)\n", data[2] & 0xffff, data[2] >> 16); instr_out(data, hw_offset, 3, "dst (%d,%d)\n", - data[3] & 0xffff, data[3] >> 16); + data[2] & 0xffff, data[2] >> 16); instr_out(data, hw_offset, 4, "dst offset 0x%08x\n", data[4]); instr_out(data, hw_offset, 5, "src (%d,%d)\n", data[5] & 0xffff, data[5] >> 16); instr_out(data, hw_offset, 6, "src pitch %d\n", - (short)(data[6] & 0xffff)); + data[6] & 0xffff); instr_out(data, hw_offset, 7, "src offset 0x%08x\n", data[7]); return len; } diff --git a/src/mesa/drivers/dri/intel/intel_depthstencil.c b/src/mesa/drivers/dri/intel/intel_depthstencil.c index 70ba68e9e3..90baecd8c2 100644 --- a/src/mesa/drivers/dri/intel/intel_depthstencil.c +++ b/src/mesa/drivers/dri/intel/intel_depthstencil.c @@ -39,7 +39,7 @@ #include "intel_fbo.h" #include "intel_depthstencil.h" #include "intel_regions.h" -#include "intel_span.h" + /** * The GL_EXT_framebuffer_object allows the user to create their own @@ -86,33 +86,68 @@ * */ + + +static void +map_regions(GLcontext * ctx, + struct intel_renderbuffer *depthRb, + struct intel_renderbuffer *stencilRb) +{ + struct intel_context *intel = intel_context(ctx); + if (depthRb && depthRb->region) { + intel_region_map(intel, depthRb->region); + depthRb->pfMap = depthRb->region->map; + depthRb->pfPitch = depthRb->region->pitch; + } + if (stencilRb && stencilRb->region) { + intel_region_map(intel, stencilRb->region); + stencilRb->pfMap = stencilRb->region->map; + stencilRb->pfPitch = stencilRb->region->pitch; + } +} + +static void +unmap_regions(GLcontext * ctx, + struct intel_renderbuffer *depthRb, + struct intel_renderbuffer *stencilRb) +{ + struct intel_context *intel = intel_context(ctx); + if (depthRb && depthRb->region) { + intel_region_unmap(intel, depthRb->region); + depthRb->pfMap = NULL; + depthRb->pfPitch = 0; + } + if (stencilRb && stencilRb->region) { + intel_region_unmap(intel, stencilRb->region); + stencilRb->pfMap = NULL; + stencilRb->pfPitch = 0; + } +} + + + /** * Undo the pairing/interleaving between depth and stencil buffers. * irb should be a depth/stencil or stencil renderbuffer. */ void -intel_unpair_depth_stencil(GLcontext *ctx, struct intel_renderbuffer *irb) +intel_unpair_depth_stencil(GLcontext * ctx, struct intel_renderbuffer *irb) { - struct intel_context *intel = intel_context(ctx); - struct gl_renderbuffer *rb = &irb->Base; - if (irb->PairedStencil) { /* irb is a depth/stencil buffer */ struct gl_renderbuffer *stencilRb; struct intel_renderbuffer *stencilIrb; - ASSERT(rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT); + ASSERT(irb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT); stencilRb = _mesa_lookup_renderbuffer(ctx, irb->PairedStencil); stencilIrb = intel_renderbuffer(stencilRb); if (stencilIrb) { /* need to extract stencil values from the depth buffer */ - ASSERT(stencilIrb->PairedDepth == rb->Name); - intel_renderbuffer_map(intel, rb); - intel_renderbuffer_map(intel, stencilRb); - _mesa_extract_stencil(ctx, rb, stencilRb); - intel_renderbuffer_unmap(intel, stencilRb); - intel_renderbuffer_unmap(intel, rb); + ASSERT(stencilIrb->PairedDepth == irb->Base.Name); + map_regions(ctx, irb, stencilIrb); + _mesa_extract_stencil(ctx, &irb->Base, &stencilIrb->Base); + unmap_regions(ctx, irb, stencilIrb); stencilIrb->PairedDepth = 0; } irb->PairedStencil = 0; @@ -122,19 +157,17 @@ intel_unpair_depth_stencil(GLcontext *ctx, struct intel_renderbuffer *irb) struct gl_renderbuffer *depthRb; struct intel_renderbuffer *depthIrb; - ASSERT(rb->_ActualFormat == GL_STENCIL_INDEX8_EXT || - rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT); + ASSERT(irb->Base._ActualFormat == GL_STENCIL_INDEX8_EXT || + irb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT); depthRb = _mesa_lookup_renderbuffer(ctx, irb->PairedDepth); depthIrb = intel_renderbuffer(depthRb); if (depthIrb) { /* need to extract stencil values from the depth buffer */ - ASSERT(depthIrb->PairedStencil == rb->Name); - intel_renderbuffer_map(intel, rb); - intel_renderbuffer_map(intel, depthRb); - _mesa_extract_stencil(ctx, depthRb, rb); - intel_renderbuffer_unmap(intel, depthRb); - intel_renderbuffer_unmap(intel, rb); + ASSERT(depthIrb->PairedStencil == irb->Base.Name); + map_regions(ctx, depthIrb, irb); + _mesa_extract_stencil(ctx, &depthIrb->Base, &irb->Base); + unmap_regions(ctx, depthIrb, irb); depthIrb->PairedStencil = 0; } irb->PairedDepth = 0; @@ -161,7 +194,6 @@ void intel_validate_paired_depth_stencil(GLcontext * ctx, struct gl_framebuffer *fb) { - struct intel_context *intel = intel_context(ctx); struct intel_renderbuffer *depthRb, *stencilRb; depthRb = intel_get_renderbuffer(fb, BUFFER_DEPTH); @@ -198,11 +230,9 @@ intel_validate_paired_depth_stencil(GLcontext * ctx, stencilRb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT); /* establish new pairing: interleave stencil into depth buffer */ - intel_renderbuffer_map(intel, &depthRb->Base); - intel_renderbuffer_map(intel, &stencilRb->Base); + map_regions(ctx, depthRb, stencilRb); _mesa_insert_stencil(ctx, &depthRb->Base, &stencilRb->Base); - intel_renderbuffer_unmap(intel, &stencilRb->Base); - intel_renderbuffer_unmap(intel, &depthRb->Base); + unmap_regions(ctx, depthRb, stencilRb); depthRb->PairedStencil = stencilRb->Base.Name; stencilRb->PairedDepth = depthRb->Base.Name; } diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 5bd2ebfdcf..b3f6610546 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -153,9 +153,6 @@ intel_delete_renderbuffer(struct gl_renderbuffer *rb) intel_unpair_depth_stencil(ctx, irb); } - if (irb->span_cache != NULL) - _mesa_free(irb->span_cache); - if (intel && irb->region) { intel_region_release(&irb->region); } @@ -212,14 +209,6 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, case GL_RGB10: case GL_RGB12: case GL_RGB16: - rb->_ActualFormat = GL_RGB8; - rb->DataType = GL_UNSIGNED_BYTE; - rb->RedBits = 8; - rb->GreenBits = 8; - rb->BlueBits = 8; - rb->AlphaBits = 0; - cpp = 4; - break; case GL_RGBA: case GL_RGBA2: case GL_RGBA4: @@ -305,6 +294,9 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, rb->Width = width; rb->Height = height; + /* This sets the Get/PutRow/Value functions */ + intel_set_span_functions(&irb->Base); + return GL_TRUE; } } @@ -374,6 +366,7 @@ intel_renderbuffer_set_region(struct intel_renderbuffer *rb, intel_region_reference(&rb->region, region); intel_region_release(&old); + rb->pfMap = region->map; rb->pfPitch = region->pitch; } @@ -453,6 +446,8 @@ intel_create_renderbuffer(GLenum intFormat) irb->Base.Delete = intel_delete_renderbuffer; irb->Base.AllocStorage = intel_alloc_window_storage; irb->Base.GetPointer = intel_get_pointer; + /* This sets the Get/PutRow/Value functions */ + intel_set_span_functions(&irb->Base); return irb; } @@ -524,7 +519,7 @@ intel_framebuffer_renderbuffer(GLcontext * ctx, static GLboolean intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, - struct gl_texture_image *texImage) + struct gl_texture_image *texImage) { if (texImage->TexFormat == &_mesa_texformat_argb8888) { irb->Base._ActualFormat = GL_RGBA8; @@ -563,6 +558,7 @@ intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, irb->Base.Delete = intel_delete_renderbuffer; irb->Base.AllocStorage = intel_nop_alloc_storage; + intel_set_span_functions(&irb->Base); irb->RenderToTexture = GL_TRUE; diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h index 9d15582d78..c90c84b48c 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.h +++ b/src/mesa/drivers/dri/intel/intel_fbo.h @@ -28,9 +28,9 @@ #ifndef INTEL_FBO_H #define INTEL_FBO_H -#include "intel_screen.h" struct intel_context; +struct intel_region; /** * Intel framebuffer, derived from gl_framebuffer. @@ -70,6 +70,7 @@ struct intel_renderbuffer { struct gl_renderbuffer Base; struct intel_region *region; + void *pfMap; /* possibly paged flipped map pointer */ GLuint pfPitch; /* possibly paged flipped pitch */ GLboolean RenderToTexture; /* RTT? */ @@ -79,9 +80,6 @@ struct intel_renderbuffer GLuint pf_pending; /**< sequence number of pending flip */ GLuint vbl_pending; /**< vblank sequence number of pending flip */ - - uint8_t *span_cache; - unsigned long span_cache_offset; }; extern struct intel_renderbuffer *intel_renderbuffer(struct gl_renderbuffer diff --git a/src/mesa/drivers/dri/intel/intel_ioctl.c b/src/mesa/drivers/dri/intel/intel_ioctl.c index 58c81766cd..f4566ba89c 100644 --- a/src/mesa/drivers/dri/intel/intel_ioctl.c +++ b/src/mesa/drivers/dri/intel/intel_ioctl.c @@ -30,8 +30,6 @@ #include <unistd.h> #include <errno.h> #include <sched.h> -#include <sys/types.h> -#include <sys/ioctl.h> #include "mtypes.h" #include "context.h" @@ -45,7 +43,7 @@ #include "drm.h" #include "i915_drm.h" -#include "intel_bufmgr.h" +#include "intel_bufmgr_ttm.h" #define FILE_DEBUG_FLAG DEBUG_IOCTL @@ -106,7 +104,7 @@ intelWaitIrq(struct intel_context *intel, int seq) } -int +void intel_batch_ioctl(struct intel_context *intel, GLuint start_offset, GLuint used, @@ -115,7 +113,7 @@ intel_batch_ioctl(struct intel_context *intel, struct drm_i915_batchbuffer batch; if (intel->no_hw) - return 0; + return; assert(intel->locked); assert(used); @@ -144,42 +142,82 @@ intel_batch_ioctl(struct intel_context *intel, if (drmCommandWrite(intel->driFd, DRM_I915_BATCHBUFFER, &batch, sizeof(batch))) { fprintf(stderr, "DRM_I915_BATCHBUFFER: %d\n", -errno); - return -errno; + UNLOCK_HARDWARE(intel); + exit(1); } - - return 0; } -int +#ifdef TTM_API +void intel_exec_ioctl(struct intel_context *intel, GLuint used, GLboolean ignore_cliprects, GLboolean allow_unlock, - struct drm_i915_gem_execbuffer *execbuf) + void *start, GLuint count, dri_fence **fence) { + struct drm_i915_execbuffer execbuf; + dri_fence *fo; int ret; assert(intel->locked); assert(used); if (intel->no_hw) - return 0; + return; + + if (*fence) { + dri_fence_unreference(*fence); + } - execbuf->batch_start_offset = 0; - execbuf->batch_len = used; - execbuf->cliprects_ptr = (uintptr_t)intel->pClipRects; - execbuf->num_cliprects = ignore_cliprects ? 0 : intel->numClipRects; - execbuf->DR1 = 0; - execbuf->DR4 = ((((GLuint) intel->drawX) & 0xffff) | - (((GLuint) intel->drawY) << 16)); + memset(&execbuf, 0, sizeof(execbuf)); + + execbuf.num_buffers = count; + execbuf.batch.used = used; + execbuf.batch.cliprects = intel->pClipRects; + execbuf.batch.num_cliprects = ignore_cliprects ? 0 : intel->numClipRects; + execbuf.batch.DR1 = 0; + execbuf.batch.DR4 = ((((GLuint) intel->drawX) & 0xffff) | + (((GLuint) intel->drawY) << 16)); + + execbuf.ops_list = (unsigned long)start; // TODO + execbuf.fence_arg.flags = DRM_FENCE_FLAG_SHAREABLE | DRM_I915_FENCE_FLAG_FLUSHED; do { - ret = ioctl(intel->driFd, DRM_IOCTL_I915_GEM_EXECBUFFER, execbuf); + ret = drmCommandWriteRead(intel->driFd, DRM_I915_EXECBUFFER, &execbuf, + sizeof(execbuf)); } while (ret == -EAGAIN); if (ret != 0) { - fprintf(stderr, "DRM_I915_GEM_EXECBUFFER: %d\n", -errno); - return -errno; + fprintf(stderr, "DRM_I915_EXECBUFFER: %d\n", -errno); + UNLOCK_HARDWARE(intel); + exit(1); } - return 0; + if (execbuf.fence_arg.error != 0) { + + /* + * Fence creation has failed, but the GPU has been + * idled by the kernel. Safe to continue. + */ + + *fence = NULL; + return; + } + + fo = intel_ttm_fence_create_from_arg(intel->bufmgr, "fence buffers", + &execbuf.fence_arg); + if (!fo) { + fprintf(stderr, "failed to fence handle: %08x\n", execbuf.fence_arg.handle); + UNLOCK_HARDWARE(intel); + exit(1); + } + *fence = fo; +} +#else +void +intel_exec_ioctl(struct intel_context *intel, + GLuint used, + GLboolean ignore_cliprects, GLboolean allow_unlock, + void *start, GLuint count, dri_fence **fence) +{ } +#endif diff --git a/src/mesa/drivers/dri/intel/intel_ioctl.h b/src/mesa/drivers/dri/intel/intel_ioctl.h index 526e38358c..8674aef723 100644 --- a/src/mesa/drivers/dri/intel/intel_ioctl.h +++ b/src/mesa/drivers/dri/intel/intel_ioctl.h @@ -33,14 +33,14 @@ void intelWaitIrq( struct intel_context *intel, int seq ); int intelEmitIrqLocked( struct intel_context *intel ); -int intel_batch_ioctl(struct intel_context *intel, - GLuint start_offset, +void intel_batch_ioctl( struct intel_context *intel, + GLuint start_offset, + GLuint used, + GLboolean ignore_cliprects, + GLboolean allow_unlock ); +void intel_exec_ioctl(struct intel_context *intel, GLuint used, - GLboolean ignore_cliprects, - GLboolean allow_unlock); -int intel_exec_ioctl(struct intel_context *intel, - GLuint used, - GLboolean ignore_cliprects, GLboolean allow_unlock, - struct drm_i915_gem_execbuffer *execbuf); + GLboolean ignore_cliprects, GLboolean allow_unlock, + void *start, GLuint count, dri_fence **fence); #endif diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c index 7e0d20e681..81238acfe4 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c @@ -43,7 +43,7 @@ #include "intel_buffer_objects.h" #include "intel_buffers.h" #include "intel_pixel.h" -#include "intel_reg.h" + #define FILE_DEBUG_FLAG DEBUG_PIXEL @@ -293,7 +293,7 @@ do_blit_bitmap( GLcontext *ctx, dst->pitch, dst->buffer, 0, - dst->tiling, + dst->tiled, rect.x1 + px, rect.y2 - (py + h), w, h, @@ -301,8 +301,9 @@ do_blit_bitmap( GLcontext *ctx, } } } + out: + intel_batchbuffer_flush(intel->batch); } -out: UNLOCK_HARDWARE(intel); diff --git a/src/mesa/drivers/dri/intel/intel_pixel_copy.c b/src/mesa/drivers/dri/intel/intel_pixel_copy.c index 3093ccf7c6..45f72bac52 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_copy.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_copy.c @@ -229,7 +229,7 @@ do_texture_copypixels(GLcontext * ctx, out: intel->vtbl.leave_meta_state(intel); - intel_batchbuffer_emit_mi_flush(intel->batch); + intel_batchbuffer_flush(intel->batch); } UNLOCK_HARDWARE(intel); @@ -337,16 +337,18 @@ do_blit_copypixels(GLcontext * ctx, continue; intelEmitCopyBlit(intel, dst->cpp, - src->pitch, src->buffer, 0, src->tiling, - dst->pitch, dst->buffer, 0, dst->tiling, + src->pitch, src->buffer, 0, src->tiled, + dst->pitch, dst->buffer, 0, dst->tiled, clip_x + delta_x, clip_y + delta_y, /* srcx, srcy */ clip_x, clip_y, /* dstx, dsty */ clip_w, clip_h, ctx->Color.ColorLogicOpEnabled ? ctx->Color.LogicOp : GL_COPY); } + + out: + intel_batchbuffer_flush(intel->batch); } -out: UNLOCK_HARDWARE(intel); DBG("%s: success\n", __FUNCTION__); diff --git a/src/mesa/drivers/dri/intel/intel_pixel_draw.c b/src/mesa/drivers/dri/intel/intel_pixel_draw.c index 5675084da5..34813d2aa0 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_draw.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_draw.c @@ -81,8 +81,7 @@ do_texture_drawpixels(GLcontext * ctx, else { /* PBO only for now: */ - if (INTEL_DEBUG & DEBUG_PIXEL) - _mesa_printf("%s - not PBO\n", __FUNCTION__); +/* _mesa_printf("%s - not PBO\n", __FUNCTION__); */ return GL_FALSE; } @@ -181,7 +180,7 @@ do_texture_drawpixels(GLcontext * ctx, srcx, srcx + width, srcy + height, srcy); out: intel->vtbl.leave_meta_state(intel); - intel_batchbuffer_emit_mi_flush(intel->batch); + intel_batchbuffer_flush(intel->batch); } UNLOCK_HARDWARE(intel); return GL_TRUE; @@ -219,6 +218,7 @@ do_blit_drawpixels(GLcontext * ctx, struct intel_buffer_object *src = intel_buffer_object(unpack->BufferObj); GLuint src_offset; GLuint rowLength; + dri_fence *fence = NULL; if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s\n", __FUNCTION__); @@ -314,7 +314,7 @@ do_blit_drawpixels(GLcontext * ctx, intelEmitCopyBlit(intel, dest->cpp, rowLength, src_buffer, src_offset, GL_FALSE, - dest->pitch, dest->buffer, 0, dest->tiling, + dest->pitch, dest->buffer, 0, dest->tiled, rect.x1 - dest_rect.x1, rect.y2 - dest_rect.y2, rect.x1, @@ -322,9 +322,17 @@ do_blit_drawpixels(GLcontext * ctx, ctx->Color.ColorLogicOpEnabled ? ctx->Color.LogicOp : GL_COPY); } + intel_batchbuffer_flush(intel->batch); + fence = intel->batch->last_fence; + dri_fence_reference(fence); } UNLOCK_HARDWARE(intel); + if (fence) { + dri_fence_wait(fence); + dri_fence_unreference(fence); + } + if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s - DONE\n", __FUNCTION__); diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h index 96af7e1a03..37629c07e2 100644 --- a/src/mesa/drivers/dri/intel/intel_reg.h +++ b/src/mesa/drivers/dri/intel/intel_reg.h @@ -31,140 +31,11 @@ #define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23) -#define MI_FLUSH (CMD_MI | (4 << 23)) -#define FLUSH_MAP_CACHE (1 << 0) -#define INHIBIT_FLUSH_RENDER_CACHE (1 << 2) - /* Stalls command execution waiting for the given events to have occurred. */ #define MI_WAIT_FOR_EVENT (CMD_MI | (0x3 << 23)) #define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) #define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) -/* p189 */ -#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 (CMD_3D | (0x1d<<24) | (0x04<<16)) -#define I1_LOAD_S(n) (1<<(4+n)) - -/** @{ - * 915 definitions - */ -#define S0_VB_OFFSET_MASK 0xffffffc -#define S0_AUTO_CACHE_INV_DISABLE (1<<0) -/** @} */ - -/** @{ - * 830 definitions - */ -#define S0_VB_OFFSET_MASK_830 0xffffff8 -#define S0_VB_PITCH_SHIFT_830 1 -#define S0_VB_ENABLE_830 0 -/** @} */ - -#define S1_VERTEX_WIDTH_SHIFT 24 -#define S1_VERTEX_WIDTH_MASK (0x3f<<24) -#define S1_VERTEX_PITCH_SHIFT 16 -#define S1_VERTEX_PITCH_MASK (0x3f<<16) - -#define TEXCOORDFMT_2D 0x0 -#define TEXCOORDFMT_3D 0x1 -#define TEXCOORDFMT_4D 0x2 -#define TEXCOORDFMT_1D 0x3 -#define TEXCOORDFMT_2D_16 0x4 -#define TEXCOORDFMT_4D_16 0x5 -#define TEXCOORDFMT_NOT_PRESENT 0xf -#define S2_TEXCOORD_FMT0_MASK 0xf -#define S2_TEXCOORD_FMT1_SHIFT 4 -#define S2_TEXCOORD_FMT(unit, type) ((type)<<(unit*4)) -#define S2_TEXCOORD_NONE (~0) -#define S2_TEX_COUNT_SHIFT_830 12 -#define S2_VERTEX_0_WIDTH_SHIFT_830 0 -#define S2_VERTEX_1_WIDTH_SHIFT_830 6 -/* S3 not interesting */ - -#define S4_POINT_WIDTH_SHIFT 23 -#define S4_POINT_WIDTH_MASK (0x1ff<<23) -#define S4_LINE_WIDTH_SHIFT 19 -#define S4_LINE_WIDTH_ONE (0x2<<19) -#define S4_LINE_WIDTH_MASK (0xf<<19) -#define S4_FLATSHADE_ALPHA (1<<18) -#define S4_FLATSHADE_FOG (1<<17) -#define S4_FLATSHADE_SPECULAR (1<<16) -#define S4_FLATSHADE_COLOR (1<<15) -#define S4_CULLMODE_BOTH (0<<13) -#define S4_CULLMODE_NONE (1<<13) -#define S4_CULLMODE_CW (2<<13) -#define S4_CULLMODE_CCW (3<<13) -#define S4_CULLMODE_MASK (3<<13) -#define S4_VFMT_POINT_WIDTH (1<<12) -#define S4_VFMT_SPEC_FOG (1<<11) -#define S4_VFMT_COLOR (1<<10) -#define S4_VFMT_DEPTH_OFFSET (1<<9) -#define S4_VFMT_XYZ (1<<6) -#define S4_VFMT_XYZW (2<<6) -#define S4_VFMT_XY (3<<6) -#define S4_VFMT_XYW (4<<6) -#define S4_VFMT_XYZW_MASK (7<<6) -#define S4_FORCE_DEFAULT_DIFFUSE (1<<5) -#define S4_FORCE_DEFAULT_SPECULAR (1<<4) -#define S4_LOCAL_DEPTH_OFFSET_ENABLE (1<<3) -#define S4_VFMT_FOG_PARAM (1<<2) -#define S4_SPRITE_POINT_ENABLE (1<<1) -#define S4_LINE_ANTIALIAS_ENABLE (1<<0) - -#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH | \ - S4_VFMT_SPEC_FOG | \ - S4_VFMT_COLOR | \ - S4_VFMT_DEPTH_OFFSET | \ - S4_VFMT_XYZW_MASK | \ - S4_VFMT_FOG_PARAM) - - -#define S5_WRITEDISABLE_ALPHA (1<<31) -#define S5_WRITEDISABLE_RED (1<<30) -#define S5_WRITEDISABLE_GREEN (1<<29) -#define S5_WRITEDISABLE_BLUE (1<<28) -#define S5_WRITEDISABLE_MASK (0xf<<28) -#define S5_FORCE_DEFAULT_POINT_SIZE (1<<27) -#define S5_LAST_PIXEL_ENABLE (1<<26) -#define S5_GLOBAL_DEPTH_OFFSET_ENABLE (1<<25) -#define S5_FOG_ENABLE (1<<24) -#define S5_STENCIL_REF_SHIFT 16 -#define S5_STENCIL_REF_MASK (0xff<<16) -#define S5_STENCIL_TEST_FUNC_SHIFT 13 -#define S5_STENCIL_TEST_FUNC_MASK (0x7<<13) -#define S5_STENCIL_FAIL_SHIFT 10 -#define S5_STENCIL_FAIL_MASK (0x7<<10) -#define S5_STENCIL_PASS_Z_FAIL_SHIFT 7 -#define S5_STENCIL_PASS_Z_FAIL_MASK (0x7<<7) -#define S5_STENCIL_PASS_Z_PASS_SHIFT 4 -#define S5_STENCIL_PASS_Z_PASS_MASK (0x7<<4) -#define S5_STENCIL_WRITE_ENABLE (1<<3) -#define S5_STENCIL_TEST_ENABLE (1<<2) -#define S5_COLOR_DITHER_ENABLE (1<<1) -#define S5_LOGICOP_ENABLE (1<<0) - - -#define S6_ALPHA_TEST_ENABLE (1<<31) -#define S6_ALPHA_TEST_FUNC_SHIFT 28 -#define S6_ALPHA_TEST_FUNC_MASK (0x7<<28) -#define S6_ALPHA_REF_SHIFT 20 -#define S6_ALPHA_REF_MASK (0xff<<20) -#define S6_DEPTH_TEST_ENABLE (1<<19) -#define S6_DEPTH_TEST_FUNC_SHIFT 16 -#define S6_DEPTH_TEST_FUNC_MASK (0x7<<16) -#define S6_CBUF_BLEND_ENABLE (1<<15) -#define S6_CBUF_BLEND_FUNC_SHIFT 12 -#define S6_CBUF_BLEND_FUNC_MASK (0x7<<12) -#define S6_CBUF_SRC_BLEND_FACT_SHIFT 8 -#define S6_CBUF_SRC_BLEND_FACT_MASK (0xf<<8) -#define S6_CBUF_DST_BLEND_FACT_SHIFT 4 -#define S6_CBUF_DST_BLEND_FACT_MASK (0xf<<4) -#define S6_DEPTH_WRITE_ENABLE (1<<3) -#define S6_COLOR_WRITE_ENABLE (1<<2) -#define S6_TRISTRIP_PV_SHIFT 0 -#define S6_TRISTRIP_PV_MASK (0x3<<0) - -#define S7_DEPTH_OFFSET_CONST_MASK ~0 - /* Primitive dispatch on 830-945 */ #define _3DPRIMITIVE (CMD_3D | (0x1f << 24)) #define PRIM_INDIRECT (1<<23) diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index ddfdce3835..35ab46afea 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -39,17 +39,13 @@ * last moment. */ -#include <sys/ioctl.h> -#include <errno.h> - #include "intel_context.h" #include "intel_regions.h" #include "intel_blit.h" #include "intel_buffer_objects.h" #include "dri_bufmgr.h" -#include "intel_bufmgr.h" +#include "intel_bufmgr_ttm.h" #include "intel_batchbuffer.h" -#include "intel_chipset.h" #define FILE_DEBUG_FLAG DEBUG_REGION @@ -80,34 +76,10 @@ intel_region_unmap(struct intel_context *intel, struct intel_region *region) } } -static int -intel_set_region_tiling_gem(struct intel_context *intel, - struct intel_region *region, - uint32_t bo_handle) -{ - struct drm_i915_gem_get_tiling get_tiling; - int ret; - - memset(&get_tiling, 0, sizeof(get_tiling)); - - get_tiling.handle = bo_handle; - ret = ioctl(intel->driFd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling); - if (ret != 0) { - fprintf(stderr, "Failed to get tiling state for region: %s\n", - strerror(errno)); - return ret; - } - - region->tiling = get_tiling.tiling_mode; - region->bit_6_swizzle = get_tiling.swizzle_mode; - - return 0; -} - static struct intel_region * intel_region_alloc_internal(struct intel_context *intel, GLuint cpp, GLuint pitch, GLuint height, - dri_bo *buffer) + GLuint tiled, dri_bo *buffer) { struct intel_region *region; @@ -121,12 +93,9 @@ intel_region_alloc_internal(struct intel_context *intel, region->pitch = pitch; region->height = height; /* needed? */ region->refcount = 1; + region->tiled = tiled; region->buffer = buffer; - /* Default to no tiling */ - region->tiling = I915_TILING_NONE; - region->bit_6_swizzle = I915_BIT_6_SWIZZLE_NONE; - return region; } @@ -137,28 +106,25 @@ intel_region_alloc(struct intel_context *intel, dri_bo *buffer; buffer = dri_bo_alloc(intel->bufmgr, "region", - pitch * cpp * height, 64); + pitch * cpp * height, 64, + DRM_BO_FLAG_MEM_LOCAL | + DRM_BO_FLAG_CACHED | + DRM_BO_FLAG_CACHED_MAPPED); - return intel_region_alloc_internal(intel, cpp, pitch, height, buffer); + return intel_region_alloc_internal(intel, cpp, pitch, height, 0, buffer); } struct intel_region * intel_region_alloc_for_handle(struct intel_context *intel, GLuint cpp, GLuint pitch, GLuint height, - GLuint handle) + GLuint tiled, GLuint handle) { - struct intel_region *region; dri_bo *buffer; - buffer = intel_bo_gem_create_from_name(intel->bufmgr, "dri2 region", handle); + buffer = intel_ttm_bo_create_from_handle(intel->bufmgr, "region", handle); - region = intel_region_alloc_internal(intel, cpp, pitch, height, buffer); - if (region == NULL) - return region; - - intel_set_region_tiling_gem(intel, region, handle); - - return region; + return intel_region_alloc_internal(intel, + cpp, pitch, height, tiled, buffer); } void @@ -172,34 +138,26 @@ intel_region_reference(struct intel_region **dst, struct intel_region *src) } void -intel_region_release(struct intel_region **region_handle) +intel_region_release(struct intel_region **region) { - struct intel_region *region = *region_handle; - - if (region == NULL) + if (!*region) return; - DBG("%s %d\n", __FUNCTION__, region->refcount - 1); - - ASSERT(region->refcount > 0); - region->refcount--; + DBG("%s %d\n", __FUNCTION__, (*region)->refcount - 1); - if (region->refcount == 0) { - assert(region->map_refcount == 0); + ASSERT((*region)->refcount > 0); + (*region)->refcount--; - if (region->pbo) - region->pbo->region = NULL; - region->pbo = NULL; - dri_bo_unreference(region->buffer); - - if (region->classic_map != NULL) { - drmUnmap(region->classic_map, - region->pitch * region->cpp * region->height); - } + if ((*region)->refcount == 0) { + assert((*region)->map_refcount == 0); - free(region); + if ((*region)->pbo) + (*region)->pbo->region = NULL; + (*region)->pbo = NULL; + dri_bo_unreference((*region)->buffer); + free(*region); } - *region_handle = NULL; + *region = NULL; } /* @@ -314,8 +272,8 @@ intel_region_copy(struct intel_context *intel, intelEmitCopyBlit(intel, dst->cpp, - src->pitch, src->buffer, src_offset, src->tiling, - dst->pitch, dst->buffer, dst_offset, dst->tiling, + src->pitch, src->buffer, src_offset, src->tiled, + dst->pitch, dst->buffer, dst_offset, dst->tiled, srcx, srcy, dstx, dsty, width, height, GL_COPY); } @@ -345,7 +303,7 @@ intel_region_fill(struct intel_context *intel, intelEmitFillBlit(intel, dst->cpp, - dst->pitch, dst->buffer, dst_offset, dst->tiling, + dst->pitch, dst->buffer, dst_offset, dst->tiled, dstx, dsty, width, height, color); } @@ -397,7 +355,10 @@ intel_region_release_pbo(struct intel_context *intel, region->buffer = dri_bo_alloc(intel->bufmgr, "region", region->pitch * region->cpp * region->height, - 64); + 64, + DRM_BO_FLAG_MEM_LOCAL | + DRM_BO_FLAG_CACHED | + DRM_BO_FLAG_CACHED_MAPPED); } /* Break the COW tie to the pbo. Both the pbo and the region end up @@ -421,19 +382,23 @@ intel_region_cow(struct intel_context *intel, struct intel_region *region) /* Now blit from the texture buffer to the new buffer: */ + intel_batchbuffer_flush(intel->batch); + was_locked = intel->locked; - if (!was_locked) + if (intel->locked) LOCK_HARDWARE(intel); intelEmitCopyBlit(intel, region->cpp, - region->pitch, region->buffer, 0, region->tiling, - region->pitch, pbo->buffer, 0, region->tiling, + region->pitch, region->buffer, 0, region->tiled, + region->pitch, pbo->buffer, 0, region->tiled, 0, 0, 0, 0, region->pitch, region->height, GL_COPY); - if (!was_locked) + intel_batchbuffer_flush(intel->batch); + + if (was_locked) UNLOCK_HARDWARE(intel); } @@ -458,7 +423,6 @@ intel_recreate_static(struct intel_context *intel, intelRegion *region_desc) { intelScreenPrivate *intelScreen = intel->intelScreen; - int ret; if (region == NULL) { region = calloc(sizeof(*region), 1); @@ -471,45 +435,21 @@ intel_recreate_static(struct intel_context *intel, region->cpp = intel->ctx.Visual.rgbBits / 8; region->pitch = intelScreen->pitch; region->height = intelScreen->height; /* needed? */ + region->tiled = region_desc->tiled; if (intel->ttm) { assert(region_desc->bo_handle != -1); - region->buffer = intel_bo_gem_create_from_name(intel->bufmgr, - name, - region_desc->bo_handle); - - intel_set_region_tiling_gem(intel, region, region_desc->bo_handle); + region->buffer = intel_ttm_bo_create_from_handle(intel->bufmgr, + name, + region_desc->bo_handle); } else { - ret = drmMap(intel->driFd, region_desc->handle, - region->pitch * region->cpp * region->height, - ®ion->classic_map); - if (ret != 0) { - fprintf(stderr, "Failed to drmMap %s buffer\n", name); - free(region); - return NULL; - } - - region->buffer = intel_bo_fake_alloc_static(intel->bufmgr, - name, - region_desc->offset, - region->pitch * region->cpp * - region->height, - region->classic_map); - - /* The sarea just gives us a boolean for whether it's tiled or not, - * instead of which tiling mode it is. Guess. - */ - if (region_desc->tiled) { - if (IS_965(intel->intelScreen->deviceID) && - region_desc == &intelScreen->depth) - region->tiling = I915_TILING_Y; - else - region->tiling = I915_TILING_X; - } else { - region->tiling = I915_TILING_NONE; - } - - region->bit_6_swizzle = I915_BIT_6_SWIZZLE_NONE; + region->buffer = dri_bo_alloc_static(intel->bufmgr, + name, + region_desc->offset, + intelScreen->pitch * + intelScreen->height, + region_desc->map, + DRM_BO_FLAG_MEM_TT); } assert(region->buffer != NULL); diff --git a/src/mesa/drivers/dri/intel/intel_regions.h b/src/mesa/drivers/dri/intel/intel_regions.h index e5f19fbb45..229f79aeba 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.h +++ b/src/mesa/drivers/dri/intel/intel_regions.h @@ -28,12 +28,6 @@ #ifndef INTEL_REGIONS_H #define INTEL_REGIONS_H -/** @file intel_regions.h - * - * Structure definitions and prototypes for intel_region handling, which is - * the basic structure for rectangular collections of pixels stored in a dri_bo. - */ - #include "mtypes.h" #include "dri_bufmgr.h" @@ -59,9 +53,8 @@ struct intel_region GLuint map_refcount; /**< Reference count for mapping */ GLuint draw_offset; /**< Offset of drawing address within the region */ - uint32_t tiling; /**< Which tiling mode the region is in */ - uint32_t bit_6_swizzle; /**< GEM flag for address swizzling requirement */ - drmAddress classic_map; /**< drmMap of the region when not in GEM mode */ + GLboolean tiled; /**< True if the region is X or Y-tiled. Used on 965. */ + struct intel_buffer_object *pbo; /* zero-copy uploads */ }; @@ -76,7 +69,7 @@ struct intel_region *intel_region_alloc(struct intel_context *intel, struct intel_region * intel_region_alloc_for_handle(struct intel_context *intel, GLuint cpp, GLuint pitch, GLuint height, - unsigned int handle); + GLuint tiled, unsigned int handle); void intel_region_reference(struct intel_region **dst, struct intel_region *src); diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 9ed89906d5..5dded4b167 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -49,7 +49,7 @@ #include "i830_dri.h" #include "intel_regions.h" #include "intel_batchbuffer.h" -#include "intel_bufmgr.h" +#include "intel_bufmgr_ttm.h" PUBLIC const char __driConfigOptions[] = DRI_CONF_BEGIN @@ -59,7 +59,7 @@ PUBLIC const char __driConfigOptions[] = /* Options correspond to DRI_CONF_BO_REUSE_DISABLED, * DRI_CONF_BO_REUSE_ALL */ - DRI_CONF_OPT_BEGIN_V(bo_reuse, enum, 1, "0:1") + DRI_CONF_OPT_BEGIN_V(bo_reuse, enum, 0, "0:1") DRI_CONF_DESC_BEGIN(en, "Buffer object reuse") DRI_CONF_ENUM(0, "Disable buffer object reuse") DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects") @@ -90,6 +90,51 @@ intelMapScreenRegions(__DRIscreenPrivate * sPriv) { intelScreenPrivate *intelScreen = (intelScreenPrivate *) sPriv->private; + if (intelScreen->front.handle) { + if (drmMap(sPriv->fd, + intelScreen->front.handle, + intelScreen->front.size, + (drmAddress *) & intelScreen->front.map) != 0) { + _mesa_problem(NULL, "drmMap(frontbuffer) failed!"); + return GL_FALSE; + } + } + else { + _mesa_warning(NULL, "no front buffer handle in intelMapScreenRegions!"); + } + + if (0) + _mesa_printf("Back 0x%08x ", intelScreen->back.handle); + if (drmMap(sPriv->fd, + intelScreen->back.handle, + intelScreen->back.size, + (drmAddress *) & intelScreen->back.map) != 0) { + intelUnmapScreenRegions(intelScreen); + return GL_FALSE; + } + + if (intelScreen->third.handle) { + if (0) + _mesa_printf("Third 0x%08x ", intelScreen->third.handle); + if (drmMap(sPriv->fd, + intelScreen->third.handle, + intelScreen->third.size, + (drmAddress *) & intelScreen->third.map) != 0) { + intelUnmapScreenRegions(intelScreen); + return GL_FALSE; + } + } + + if (0) + _mesa_printf("Depth 0x%08x ", intelScreen->depth.handle); + if (drmMap(sPriv->fd, + intelScreen->depth.handle, + intelScreen->depth.size, + (drmAddress *) & intelScreen->depth.map) != 0) { + intelUnmapScreenRegions(intelScreen); + return GL_FALSE; + } + if (0) _mesa_printf("TEX 0x%08x ", intelScreen->tex.handle); if (intelScreen->tex.size != 0) { @@ -102,15 +147,50 @@ intelMapScreenRegions(__DRIscreenPrivate * sPriv) } } + if (0) + printf("Mappings: front: %p back: %p third: %p depth: %p tex: %p\n", + intelScreen->front.map, + intelScreen->back.map, intelScreen->third.map, + intelScreen->depth.map, intelScreen->tex.map); return GL_TRUE; } void intelUnmapScreenRegions(intelScreenPrivate * intelScreen) { +#define REALLY_UNMAP 1 + if (intelScreen->front.map) { +#if REALLY_UNMAP + if (drmUnmap(intelScreen->front.map, intelScreen->front.size) != 0) + printf("drmUnmap front failed!\n"); +#endif + intelScreen->front.map = NULL; + } + if (intelScreen->back.map) { +#if REALLY_UNMAP + if (drmUnmap(intelScreen->back.map, intelScreen->back.size) != 0) + printf("drmUnmap back failed!\n"); +#endif + intelScreen->back.map = NULL; + } + if (intelScreen->third.map) { +#if REALLY_UNMAP + if (drmUnmap(intelScreen->third.map, intelScreen->third.size) != 0) + printf("drmUnmap third failed!\n"); +#endif + intelScreen->third.map = NULL; + } + if (intelScreen->depth.map) { +#if REALLY_UNMAP + drmUnmap(intelScreen->depth.map, intelScreen->depth.size); + intelScreen->depth.map = NULL; +#endif + } if (intelScreen->tex.map) { +#if REALLY_UNMAP drmUnmap(intelScreen->tex.map, intelScreen->tex.size); intelScreen->tex.map = NULL; +#endif } } @@ -141,16 +221,16 @@ intelPrintSAREA(const struct drm_i915_sarea * sarea) sarea->height); fprintf(stderr, "SAREA: pitch: %d\n", sarea->pitch); fprintf(stderr, - "SAREA: front offset: 0x%08x size: 0x%x handle: 0x%x tiled: %d\n", + "SAREA: front offset: 0x%08x size: 0x%x handle: 0x%x\n", sarea->front_offset, sarea->front_size, - (unsigned) sarea->front_handle, sarea->front_tiled); + (unsigned) sarea->front_handle); fprintf(stderr, - "SAREA: back offset: 0x%08x size: 0x%x handle: 0x%x tiled: %d\n", + "SAREA: back offset: 0x%08x size: 0x%x handle: 0x%x\n", sarea->back_offset, sarea->back_size, - (unsigned) sarea->back_handle, sarea->back_tiled); - fprintf(stderr, "SAREA: depth offset: 0x%08x size: 0x%x handle: 0x%x tiled: %d\n", + (unsigned) sarea->back_handle); + fprintf(stderr, "SAREA: depth offset: 0x%08x size: 0x%x handle: 0x%x\n", sarea->depth_offset, sarea->depth_size, - (unsigned) sarea->depth_handle, sarea->depth_tiled); + (unsigned) sarea->depth_handle); fprintf(stderr, "SAREA: tex offset: 0x%08x size: 0x%x handle: 0x%x\n", sarea->tex_offset, sarea->tex_size, (unsigned) sarea->tex_handle); } @@ -254,6 +334,8 @@ intelHandleDrawableConfig(__DRIdrawablePrivate *dPriv, * attached. */ } +#define BUFFER_FLAG_TILED 0x0100 + /** * DRI2 entrypoint */ @@ -266,6 +348,7 @@ intelHandleBufferAttach(__DRIdrawablePrivate *dPriv, struct intel_renderbuffer *rb; struct intel_region *region; struct intel_context *intel = pcp->driverPrivate; + GLuint tiled; switch (ba->buffer.attachment) { case DRI_DRAWABLE_BUFFER_FRONT_LEFT: @@ -299,9 +382,10 @@ intelHandleBufferAttach(__DRIdrawablePrivate *dPriv, return; #endif + tiled = (ba->buffer.flags & BUFFER_FLAG_TILED) > 0; region = intel_region_alloc_for_handle(intel, ba->buffer.cpp, ba->buffer.pitch / ba->buffer.cpp, - dPriv->h, + dPriv->h, tiled, ba->buffer.handle); intel_renderbuffer_set_region(rb, region); @@ -446,13 +530,14 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, _mesa_initialize_framebuffer(&intel_fb->Base, mesaVis); /* setup the hardware-based renderbuffers */ - intel_fb->color_rb[0] = intel_create_renderbuffer(rgbFormat); - _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_FRONT_LEFT, - &intel_fb->color_rb[0]->Base); + { + intel_fb->color_rb[0] = intel_create_renderbuffer(rgbFormat); + _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_FRONT_LEFT, + &intel_fb->color_rb[0]->Base); + } if (mesaVis->doubleBufferMode) { - intel_fb->color_rb[1] = intel_create_renderbuffer(rgbFormat); - + intel_fb->color_rb[1] = intel_create_renderbuffer(rgbFormat); _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_BACK_LEFT, &intel_fb->color_rb[1]->Base); @@ -484,7 +569,7 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, else if (mesaVis->depthBits == 16) { /* just 16-bit depth buffer, no hw stencil */ struct intel_renderbuffer *depthRb - = intel_create_renderbuffer(GL_DEPTH_COMPONENT16); + = intel_create_renderbuffer(GL_DEPTH_COMPONENT16); _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, &depthRb->Base); } diff --git a/src/mesa/drivers/dri/intel/intel_screen.h b/src/mesa/drivers/dri/intel/intel_screen.h index 9a73b13951..e62b2d7c89 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.h +++ b/src/mesa/drivers/dri/intel/intel_screen.h @@ -74,8 +74,6 @@ typedef struct int irq_active; int allow_batchbuffer; - int ttm; - /** * Configuration cache with default values for all contexts */ diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index edede3a74b..742b1b8735 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -39,224 +39,6 @@ #include "swrast/swrast.h" -static void -intel_set_span_functions(struct intel_context *intel, - struct gl_renderbuffer *rb); - -#define SPAN_CACHE_SIZE 4096 - -static void -get_span_cache(struct intel_renderbuffer *irb, uint32_t offset) -{ - if (irb->span_cache == NULL) { - irb->span_cache = _mesa_malloc(SPAN_CACHE_SIZE); - irb->span_cache_offset = -1; - } - - if ((offset & ~(SPAN_CACHE_SIZE - 1)) != irb->span_cache_offset) { - irb->span_cache_offset = offset & ~(SPAN_CACHE_SIZE - 1); - dri_bo_get_subdata(irb->region->buffer, irb->span_cache_offset, - SPAN_CACHE_SIZE, irb->span_cache); - } -} - -static void -clear_span_cache(struct intel_renderbuffer *irb) -{ - irb->span_cache_offset = -1; -} - -static uint32_t -pread_32(struct intel_renderbuffer *irb, uint32_t offset) -{ - get_span_cache(irb, offset); - - return *(uint32_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1))); -} - -static uint32_t -pread_xrgb8888(struct intel_renderbuffer *irb, uint32_t offset) -{ - get_span_cache(irb, offset); - - return *(uint32_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1))) | - 0xff000000; -} - -static uint16_t -pread_16(struct intel_renderbuffer *irb, uint32_t offset) -{ - get_span_cache(irb, offset); - - return *(uint16_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1))); -} - -static uint8_t -pread_8(struct intel_renderbuffer *irb, uint32_t offset) -{ - get_span_cache(irb, offset); - - return *(uint8_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1))); -} - -static void -pwrite_32(struct intel_renderbuffer *irb, uint32_t offset, uint32_t val) -{ - clear_span_cache(irb); - - dri_bo_subdata(irb->region->buffer, offset, 4, &val); -} - -static void -pwrite_xrgb8888(struct intel_renderbuffer *irb, uint32_t offset, uint32_t val) -{ - clear_span_cache(irb); - - dri_bo_subdata(irb->region->buffer, offset, 3, &val); -} - -static void -pwrite_16(struct intel_renderbuffer *irb, uint32_t offset, uint16_t val) -{ - clear_span_cache(irb); - - dri_bo_subdata(irb->region->buffer, offset, 2, &val); -} - -static void -pwrite_8(struct intel_renderbuffer *irb, uint32_t offset, uint8_t val) -{ - clear_span_cache(irb); - - dri_bo_subdata(irb->region->buffer, offset, 1, &val); -} - -static uint32_t no_tile_swizzle(struct intel_renderbuffer *irb, - struct intel_context *intel, - int x, int y) -{ - x += intel->drawX; - y += intel->drawY; - - return (y * irb->region->pitch + x) * irb->region->cpp; -} - -/* - * Deal with tiled surfaces - */ - -static uint32_t x_tile_swizzle(struct intel_renderbuffer *irb, - struct intel_context *intel, - int x, int y) -{ - int tile_stride; - int xbyte; - int x_tile_off, y_tile_off; - int x_tile_number, y_tile_number; - int tile_off, tile_base; - - tile_stride = (irb->pfPitch * irb->region->cpp) << 3; - - x += intel->drawX; - y += intel->drawY; - - xbyte = x * irb->region->cpp; - - x_tile_off = xbyte & 0x1ff; - y_tile_off = y & 7; - - x_tile_number = xbyte >> 9; - y_tile_number = y >> 3; - - tile_off = (y_tile_off << 9) + x_tile_off; - - switch (irb->region->bit_6_swizzle) { - case I915_BIT_6_SWIZZLE_NONE: - break; - case I915_BIT_6_SWIZZLE_9: - tile_off ^= ((tile_off >> 3) & 64); - break; - case I915_BIT_6_SWIZZLE_9_10: - tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64); - break; - case I915_BIT_6_SWIZZLE_9_11: - tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 5) & 64); - break; - case I915_BIT_6_SWIZZLE_9_10_11: - tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64) ^ - ((tile_off >> 5) & 64); - break; - default: - fprintf(stderr, "Unknown tile swizzling mode %d\n", - irb->region->bit_6_swizzle); - exit(1); - } - - tile_base = (x_tile_number << 12) + y_tile_number * tile_stride; - -#if 0 - printf("(%d,%d) -> %d + %d = %d (pitch = %d, tstride = %d)\n", - x, y, tile_off, tile_base, - tile_off + tile_base, - irb->pfPitch, tile_stride); -#endif - - return tile_base + tile_off; -} - -static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, - struct intel_context *intel, - int x, int y) -{ - int tile_stride; - int xbyte; - int x_tile_off, y_tile_off; - int x_tile_number, y_tile_number; - int tile_off, tile_base; - - tile_stride = (irb->pfPitch * irb->region->cpp) << 5; - - x += intel->drawX; - y += intel->drawY; - - xbyte = x * irb->region->cpp; - - x_tile_off = xbyte & 0x7f; - y_tile_off = y & 0x1f; - - x_tile_number = xbyte >> 7; - y_tile_number = y >> 5; - - tile_off = ((x_tile_off & ~0xf) << 5) + (y_tile_off << 4) + - (x_tile_off & 0xf); - - switch (irb->region->bit_6_swizzle) { - case I915_BIT_6_SWIZZLE_NONE: - break; - case I915_BIT_6_SWIZZLE_9: - tile_off ^= ((tile_off >> 3) & 64); - break; - case I915_BIT_6_SWIZZLE_9_10: - tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64); - break; - case I915_BIT_6_SWIZZLE_9_11: - tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 5) & 64); - break; - case I915_BIT_6_SWIZZLE_9_10_11: - tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64) ^ - ((tile_off >> 5) & 64); - break; - default: - fprintf(stderr, "Unknown tile swizzling mode %d\n", - irb->region->bit_6_swizzle); - exit(1); - } - - tile_base = (x_tile_number << 12) + y_tile_number * tile_stride; - - return tile_base + tile_off; -} - /* break intelWriteRGBASpan_ARGB8888 */ @@ -269,7 +51,10 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ const GLint yScale = irb->RenderToTexture ? 1 : -1; \ const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1; \ + GLubyte *buf = (GLubyte *) irb->pfMap \ + + (intel->drawY * irb->pfPitch + intel->drawX) * irb->region->cpp;\ GLuint p; \ + assert(irb->pfMap);\ (void) p; /* XXX FBO: this is identical to the macro in spantmp2.h except we get @@ -284,14 +69,12 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, int miny = intel->pClipRects[_nc].y1 - intel->drawY; \ int maxx = intel->pClipRects[_nc].x2 - intel->drawX; \ int maxy = intel->pClipRects[_nc].y2 - intel->drawY; - -#if 0 - }} -#endif + + + #define Y_FLIP(_y) ((_y) * yScale + yBias) -/* XXX with GEM, these need to tell the kernel */ #define HW_LOCK() #define HW_UNLOCK() @@ -303,8 +86,7 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #define TAG(x) intel##x##_RGB565 #define TAG2(x,y) intel##x##_RGB565##y -#define GET_VALUE(X, Y) pread_16(irb, no_tile_swizzle(irb, intel, X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_16(irb, no_tile_swizzle(irb, intel, X, Y), V) +#define GET_PTR(X,Y) (buf + ((Y) * irb->pfPitch + (X)) * 2) #include "spantmp2.h" /* 32 bit, ARGB8888 color spanline and pixel functions @@ -314,89 +96,17 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #define TAG(x) intel##x##_ARGB8888 #define TAG2(x,y) intel##x##_ARGB8888##y -#define GET_VALUE(X, Y) pread_32(irb, no_tile_swizzle(irb, intel, X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_32(irb, no_tile_swizzle(irb, intel, X, Y), V) -#include "spantmp2.h" - -/* 32 bit, xRGB8888 color spanline and pixel functions - */ -#define SPANTMP_PIXEL_FMT GL_BGRA -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV - -#define TAG(x) intel##x##_xRGB8888 -#define TAG2(x,y) intel##x##_xRGB8888##y -#define GET_VALUE(X, Y) pread_xrgb8888(irb, no_tile_swizzle(irb, intel, X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, no_tile_swizzle(irb, intel, X, Y), V) -#include "spantmp2.h" - -/* 16 bit RGB565 color tile spanline and pixel functions - */ - -#define SPANTMP_PIXEL_FMT GL_RGB -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 - -#define TAG(x) intel_XTile_##x##_RGB565 -#define TAG2(x,y) intel_XTile_##x##_RGB565##y -#define GET_VALUE(X, Y) pread_16(irb, x_tile_swizzle(irb, intel, X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_16(irb, x_tile_swizzle(irb, intel, X, Y), V) -#include "spantmp2.h" - -#define SPANTMP_PIXEL_FMT GL_RGB -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 - -#define TAG(x) intel_YTile_##x##_RGB565 -#define TAG2(x,y) intel_YTile_##x##_RGB565##y -#define GET_VALUE(X, Y) pread_16(irb, y_tile_swizzle(irb, intel, X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_16(irb, y_tile_swizzle(irb, intel, X, Y), V) -#include "spantmp2.h" - -/* 32 bit ARGB888 color tile spanline and pixel functions - */ - -#define SPANTMP_PIXEL_FMT GL_BGRA -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV - -#define TAG(x) intel_XTile_##x##_ARGB8888 -#define TAG2(x,y) intel_XTile_##x##_ARGB8888##y -#define GET_VALUE(X, Y) pread_32(irb, x_tile_swizzle(irb, intel, X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_32(irb, x_tile_swizzle(irb, intel, X, Y), V) -#include "spantmp2.h" - -#define SPANTMP_PIXEL_FMT GL_BGRA -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV - -#define TAG(x) intel_YTile_##x##_ARGB8888 -#define TAG2(x,y) intel_YTile_##x##_ARGB8888##y -#define GET_VALUE(X, Y) pread_32(irb, y_tile_swizzle(irb, intel, X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_32(irb, y_tile_swizzle(irb, intel, X, Y), V) -#include "spantmp2.h" - -/* 32 bit xRGB888 color tile spanline and pixel functions - */ - -#define SPANTMP_PIXEL_FMT GL_BGRA -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV - -#define TAG(x) intel_XTile_##x##_xRGB8888 -#define TAG2(x,y) intel_XTile_##x##_xRGB8888##y -#define GET_VALUE(X, Y) pread_xrgb8888(irb, x_tile_swizzle(irb, intel, X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, x_tile_swizzle(irb, intel, X, Y), V) -#include "spantmp2.h" - -#define SPANTMP_PIXEL_FMT GL_BGRA -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV - -#define TAG(x) intel_YTile_##x##_xRGB8888 -#define TAG2(x,y) intel_YTile_##x##_xRGB8888##y -#define GET_VALUE(X, Y) pread_xrgb8888(irb, y_tile_swizzle(irb, intel, X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, y_tile_swizzle(irb, intel, X, Y), V) +#define GET_PTR(X,Y) (buf + ((Y) * irb->pfPitch + (X)) * 4) #include "spantmp2.h" #define LOCAL_DEPTH_VARS \ struct intel_context *intel = intel_context(ctx); \ struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ + const GLuint pitch = irb->pfPitch/***XXX region->pitch*/; /* in pixels */ \ const GLint yScale = irb->RenderToTexture ? 1 : -1; \ - const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1; + const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1; \ + char *buf = (char *) irb->pfMap/*XXX use region->map*/ + \ + (intel->drawY * pitch + intel->drawX) * irb->region->cpp; #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS @@ -405,34 +115,15 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, ** 16-bit depthbuffer functions. **/ #define VALUE_TYPE GLushort -#define WRITE_DEPTH(_x, _y, d) \ - pwrite_16(irb, no_tile_swizzle(irb, intel, _x, _y), d) -#define READ_DEPTH(d, _x, _y) \ - d = pread_16(irb, no_tile_swizzle(irb, intel, _x, _y)) -#define TAG(x) intel##x##_z16 -#include "depthtmp.h" +#define WRITE_DEPTH( _x, _y, d ) \ + ((GLushort *)buf)[(_x) + (_y) * pitch] = d; -/** - ** 16-bit x tile depthbuffer functions. - **/ -#define VALUE_TYPE GLushort -#define WRITE_DEPTH(_x, _y, d) \ - pwrite_16(irb, x_tile_swizzle(irb, intel, _x, _y), d) -#define READ_DEPTH(d, _x, _y) \ - d = pread_16(irb, x_tile_swizzle(irb, intel, _x, _y)) -#define TAG(x) intel_XTile_##x##_z16 -#include "depthtmp.h" +#define READ_DEPTH( d, _x, _y ) \ + d = ((GLushort *)buf)[(_x) + (_y) * pitch]; -/** - ** 16-bit y tile depthbuffer functions. - **/ -#define VALUE_TYPE GLushort -#define WRITE_DEPTH(_x, _y, d) \ - pwrite_16(irb, y_tile_swizzle(irb, intel, _x, _y), d) -#define READ_DEPTH(d, _x, _y) \ - d = pread_16(irb, y_tile_swizzle(irb, intel, _x, _y)) -#define TAG(x) intel_YTile_##x##_z16 + +#define TAG(x) intel##x##_z16 #include "depthtmp.h" @@ -445,129 +136,38 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #define VALUE_TYPE GLuint /* Change ZZZS -> SZZZ */ -#define WRITE_DEPTH(_x, _y, d) \ - pwrite_32(irb, no_tile_swizzle(irb, intel, _x, _y), \ - ((d) >> 8) | ((d) << 24)) - -/* Change SZZZ -> ZZZS */ -#define READ_DEPTH( d, _x, _y ) { \ - GLuint tmp = pread_32(irb, no_tile_swizzle(irb, intel, _x, _y)); \ - d = (tmp << 8) | (tmp >> 24); \ +#define WRITE_DEPTH( _x, _y, d ) { \ + GLuint tmp = ((d) >> 8) | ((d) << 24); \ + ((GLuint *)buf)[(_x) + (_y) * pitch] = tmp; \ } -#define TAG(x) intel##x##_z24_s8 -#include "depthtmp.h" - - -/** - ** 24/8-bit x-tile interleaved depth/stencil functions - ** Note: we're actually reading back combined depth+stencil values. - ** The wrappers in main/depthstencil.c are used to extract the depth - ** and stencil values. - **/ -#define VALUE_TYPE GLuint - -/* Change ZZZS -> SZZZ */ -#define WRITE_DEPTH(_x, _y, d) \ - pwrite_32(irb, x_tile_swizzle(irb, intel, _x, _y), \ - ((d) >> 8) | ((d) << 24)) \ - /* Change SZZZ -> ZZZS */ #define READ_DEPTH( d, _x, _y ) { \ - GLuint tmp = pread_32(irb, x_tile_swizzle(irb, intel, _x, _y)); \ + GLuint tmp = ((GLuint *)buf)[(_x) + (_y) * pitch]; \ d = (tmp << 8) | (tmp >> 24); \ } -#define TAG(x) intel_XTile_##x##_z24_s8 -#include "depthtmp.h" - -/** - ** 24/8-bit y-tile interleaved depth/stencil functions - ** Note: we're actually reading back combined depth+stencil values. - ** The wrappers in main/depthstencil.c are used to extract the depth - ** and stencil values. - **/ -#define VALUE_TYPE GLuint - -/* Change ZZZS -> SZZZ */ -#define WRITE_DEPTH(_x, _y, d) \ - pwrite_32(irb, y_tile_swizzle(irb, intel, _x, _y), \ - ((d) >> 8) | ((d) << 24)) - -/* Change SZZZ -> ZZZS */ -#define READ_DEPTH( d, _x, _y ) { \ - GLuint tmp = pread_32(irb, y_tile_swizzle(irb, intel, _x, _y)); \ - d = (tmp << 8) | (tmp >> 24); \ -} - -#define TAG(x) intel_YTile_##x##_z24_s8 +#define TAG(x) intel##x##_z24_s8 #include "depthtmp.h" /** ** 8-bit stencil function (XXX FBO: This is obsolete) **/ -#define WRITE_STENCIL(_x, _y, d) \ - pwrite_8(irb, no_tile_swizzle(irb, intel, _x, _y) + 3, d) +#define WRITE_STENCIL( _x, _y, d ) { \ + GLuint tmp = ((GLuint *)buf)[(_x) + (_y) * pitch]; \ + tmp &= 0xffffff; \ + tmp |= ((d) << 24); \ + ((GLuint *) buf)[(_x) + (_y) * pitch] = tmp; \ +} -#define READ_STENCIL(d, _x, _y) \ - d = pread_8(irb, no_tile_swizzle(irb, intel, _x, _y) + 3); +#define READ_STENCIL( d, _x, _y ) \ + d = ((GLuint *)buf)[(_x) + (_y) * pitch] >> 24; #define TAG(x) intel##x##_z24_s8 #include "stenciltmp.h" -/** - ** 8-bit x-tile stencil function (XXX FBO: This is obsolete) - **/ -#define WRITE_STENCIL(_x, _y, d) \ - pwrite_8(irb, x_tile_swizzle(irb, intel, _x, _y) + 3, d) -#define READ_STENCIL(d, _x, _y) \ - d = pread_8(irb, x_tile_swizzle(irb, intel, _x, _y) + 3); - -#define TAG(x) intel_XTile_##x##_z24_s8 -#include "stenciltmp.h" - -/** - ** 8-bit y-tile stencil function (XXX FBO: This is obsolete) - **/ -#define WRITE_STENCIL(_x, _y, d) \ - pwrite_8(irb, y_tile_swizzle(irb, intel, _x, _y) + 3, d) - -#define READ_STENCIL(d, _x, _y) \ - d = pread_8(irb, y_tile_swizzle(irb, intel, _x, _y) + 3) - -#define TAG(x) intel_YTile_##x##_z24_s8 -#include "stenciltmp.h" - -void -intel_renderbuffer_map(struct intel_context *intel, struct gl_renderbuffer *rb) -{ - struct intel_renderbuffer *irb = intel_renderbuffer(rb); - - if (irb == NULL || irb->region == NULL) - return; - - irb->pfPitch = irb->region->pitch; - - intel_set_span_functions(intel, rb); -} - -void -intel_renderbuffer_unmap(struct intel_context *intel, - struct gl_renderbuffer *rb) -{ - struct intel_renderbuffer *irb = intel_renderbuffer(rb); - - if (irb == NULL || irb->region == NULL) - return; - - clear_span_cache(irb); - irb->pfPitch = 0; - - rb->GetRow = NULL; - rb->PutRow = NULL; -} /** * Map or unmap all the renderbuffers which we may need during @@ -586,13 +186,23 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map) { GLcontext *ctx = &intel->ctx; GLuint i, j; + struct intel_renderbuffer *irb; /* color draw buffers */ for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++) { - if (map) - intel_renderbuffer_map(intel, ctx->DrawBuffer->_ColorDrawBuffers[j]); - else - intel_renderbuffer_unmap(intel, ctx->DrawBuffer->_ColorDrawBuffers[j]); + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[j]; + irb = intel_renderbuffer(rb); + if (irb) { + /* this is a user-created intel_renderbuffer */ + if (irb->region) { + if (map) + intel_region_map(intel, irb->region); + else + intel_region_unmap(intel, irb->region); + irb->pfMap = irb->region->map; + irb->pfPitch = irb->region->pitch; + } + } } /* check for render to textures */ @@ -615,28 +225,77 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map) } /* color read buffers */ - if (map) - intel_renderbuffer_map(intel, ctx->ReadBuffer->_ColorReadBuffer); - else - intel_renderbuffer_unmap(intel, ctx->ReadBuffer->_ColorReadBuffer); + irb = intel_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer); + if (irb && irb->region) { + if (map) + intel_region_map(intel, irb->region); + else + intel_region_unmap(intel, irb->region); + irb->pfMap = irb->region->map; + irb->pfPitch = irb->region->pitch; + } + + /* Account for front/back color page flipping. + * The span routines use the pfMap and pfPitch fields which will + * swap the front/back region map/pitch if we're page flipped. + * Do this after mapping, above, so the map field is valid. + */ +#if 0 + if (map && ctx->DrawBuffer->Name == 0) { + struct intel_renderbuffer *irbFront + = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_FRONT_LEFT); + struct intel_renderbuffer *irbBack + = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_BACK_LEFT); + if (irbBack) { + /* double buffered */ + if (intel->sarea->pf_current_page == 0) { + irbFront->pfMap = irbFront->region->map; + irbFront->pfPitch = irbFront->region->pitch; + irbBack->pfMap = irbBack->region->map; + irbBack->pfPitch = irbBack->region->pitch; + } + else { + irbFront->pfMap = irbBack->region->map; + irbFront->pfPitch = irbBack->region->pitch; + irbBack->pfMap = irbFront->region->map; + irbBack->pfPitch = irbFront->region->pitch; + } + } + } +#endif /* depth buffer (Note wrapper!) */ if (ctx->DrawBuffer->_DepthBuffer) { - if (map) - intel_renderbuffer_map(intel, ctx->DrawBuffer->_DepthBuffer->Wrapped); - else - intel_renderbuffer_unmap(intel, - ctx->DrawBuffer->_DepthBuffer->Wrapped); + irb = intel_renderbuffer(ctx->DrawBuffer->_DepthBuffer->Wrapped); + if (irb && irb->region) { + if (map) { + intel_region_map(intel, irb->region); + irb->pfMap = irb->region->map; + irb->pfPitch = irb->region->pitch; + } + else { + intel_region_unmap(intel, irb->region); + irb->pfMap = irb->region->map; + irb->pfPitch = irb->region->pitch; + } + } } /* stencil buffer (Note wrapper!) */ if (ctx->DrawBuffer->_StencilBuffer) { - if (map) - intel_renderbuffer_map(intel, - ctx->DrawBuffer->_StencilBuffer->Wrapped); - else - intel_renderbuffer_unmap(intel, - ctx->DrawBuffer->_StencilBuffer->Wrapped); + irb = intel_renderbuffer(ctx->DrawBuffer->_StencilBuffer->Wrapped); + if (irb && irb->region) { + if (map) { + intel_region_map(intel, irb->region); + irb->pfMap = irb->region->map; + irb->pfPitch = irb->region->pitch; + } + else { + intel_region_unmap(intel, irb->region); + irb->pfMap = irb->region->map; + irb->pfPitch = irb->region->pitch; + } + } } } @@ -654,9 +313,18 @@ intelSpanRenderStart(GLcontext * ctx) struct intel_context *intel = intel_context(ctx); GLuint i; - intelFlush(&intel->ctx); + intelFinish(&intel->ctx); LOCK_HARDWARE(intel); +#if 0 + /* Just map the framebuffer and all textures. Bufmgr code will + * take care of waiting on the necessary fences: + */ + intel_region_map(intel, intel->front_region); + intel_region_map(intel, intel->back_region); + intel_region_map(intel, intel->depth_region); +#endif + for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) { if (ctx->Texture.Unit[i]._ReallyEnabled) { struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current; @@ -679,6 +347,14 @@ intelSpanRenderFinish(GLcontext * ctx) _swrast_flush(ctx); + /* Now unmap the framebuffer: + */ +#if 0 + intel_region_unmap(intel, intel->front_region); + intel_region_unmap(intel, intel->back_region); + intel_region_unmap(intel, intel->depth_region); +#endif + for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) { if (ctx->Texture.Unit[i]._ReallyEnabled) { struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current; @@ -705,108 +381,26 @@ intelInitSpanFuncs(GLcontext * ctx) * Plug in appropriate span read/write functions for the given renderbuffer. * These are used for the software fallbacks. */ -static void -intel_set_span_functions(struct intel_context *intel, - struct gl_renderbuffer *rb) +void +intel_set_span_functions(struct gl_renderbuffer *rb) { - struct intel_renderbuffer *irb = (struct intel_renderbuffer *) rb; - uint32_t tiling; - - /* If in GEM mode, we need to do the tile address swizzling ourselves, - * instead of the fence registers handling it. - */ - if (intel->ttm) - tiling = irb->region->tiling; - else - tiling = I915_TILING_NONE; - if (rb->_ActualFormat == GL_RGB5) { /* 565 RGB */ - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitPointers_RGB565(rb); - break; - case I915_TILING_X: - intel_XTile_InitPointers_RGB565(rb); - break; - case I915_TILING_Y: - intel_YTile_InitPointers_RGB565(rb); - break; - } - } - else if (rb->_ActualFormat == GL_RGB8) { - /* 8888 RGBx */ - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitPointers_xRGB8888(rb); - break; - case I915_TILING_X: - intel_XTile_InitPointers_xRGB8888(rb); - break; - case I915_TILING_Y: - intel_YTile_InitPointers_xRGB8888(rb); - break; - } + intelInitPointers_RGB565(rb); } else if (rb->_ActualFormat == GL_RGBA8) { /* 8888 RGBA */ - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitPointers_ARGB8888(rb); - break; - case I915_TILING_X: - intel_XTile_InitPointers_ARGB8888(rb); - break; - case I915_TILING_Y: - intel_YTile_InitPointers_ARGB8888(rb); - break; - } + intelInitPointers_ARGB8888(rb); } else if (rb->_ActualFormat == GL_DEPTH_COMPONENT16) { - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitDepthPointers_z16(rb); - break; - case I915_TILING_X: - intel_XTile_InitDepthPointers_z16(rb); - break; - case I915_TILING_Y: - intel_YTile_InitDepthPointers_z16(rb); - break; - } + intelInitDepthPointers_z16(rb); } else if (rb->_ActualFormat == GL_DEPTH_COMPONENT24 || /* XXX FBO remove */ rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT) { - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitDepthPointers_z24_s8(rb); - break; - case I915_TILING_X: - intel_XTile_InitDepthPointers_z24_s8(rb); - break; - case I915_TILING_Y: - intel_YTile_InitDepthPointers_z24_s8(rb); - break; - } + intelInitDepthPointers_z24_s8(rb); } - else if (rb->_ActualFormat == GL_STENCIL_INDEX8_EXT) { - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitStencilPointers_z24_s8(rb); - break; - case I915_TILING_X: - intel_XTile_InitStencilPointers_z24_s8(rb); - break; - case I915_TILING_Y: - intel_YTile_InitStencilPointers_z24_s8(rb); - break; - } + else if (rb->_ActualFormat == GL_STENCIL_INDEX8_EXT) { /* XXX FBO remove */ + intelInitStencilPointers_z24_s8(rb); } else { _mesa_problem(NULL, diff --git a/src/mesa/drivers/dri/intel/intel_span.h b/src/mesa/drivers/dri/intel/intel_span.h index acbeb4abe1..5201f6d6c6 100644 --- a/src/mesa/drivers/dri/intel/intel_span.h +++ b/src/mesa/drivers/dri/intel/intel_span.h @@ -32,9 +32,7 @@ extern void intelInitSpanFuncs(GLcontext * ctx); extern void intelSpanRenderFinish(GLcontext * ctx); extern void intelSpanRenderStart(GLcontext * ctx); -void intel_renderbuffer_map(struct intel_context *intel, - struct gl_renderbuffer *rb); -void intel_renderbuffer_unmap(struct intel_context *intel, - struct gl_renderbuffer *rb); + +extern void intel_set_span_functions(struct gl_renderbuffer *rb); #endif diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index d0ab464a1c..1add7c6188 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -144,13 +144,15 @@ do_copy_texsubimage(struct intel_context *intel, -src->pitch, src->buffer, src->height * src->pitch * src->cpp, - src->tiling, + GL_FALSE, intelImage->mt->pitch, intelImage->mt->region->buffer, image_offset, - intelImage->mt->region->tiling, + intelImage->mt->region->tiled, x, y + height, dstx, dsty, width, height, GL_COPY); /* ? */ + + intel_batchbuffer_flush(intel->batch); } } diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index c11687a2cf..f261034c18 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -238,6 +238,8 @@ try_pbo_upload(struct intel_context *intel, dst_stride, dst_buffer, dst_offset, GL_FALSE, 0, 0, 0, 0, width, height, GL_COPY); + + intel_batchbuffer_flush(intel->batch); } UNLOCK_HARDWARE(intel); @@ -398,26 +400,11 @@ intelTexImage(GLcontext * ctx, intel_miptree_reference(&intelImage->mt, intelObj->mt); assert(intelImage->mt); - } else if (intelImage->base.Border == 0) { - int comp_byte = 0; - - if (intelImage->base.IsCompressed) { - comp_byte = - intel_compressed_num_bytes(intelImage->base.TexFormat->MesaFormat); - } - - /* Didn't fit in the object miptree, but it's suitable for inclusion in - * a miptree, so create one just for our level and store it in the image. - * It'll get moved into the object miptree at validate time. - */ - intelImage->mt = intel_miptree_create(intel, target, internalFormat, - level, level, - width, height, depth, - intelImage->base.TexFormat->TexelBytes, - comp_byte); - } + if (!intelImage->mt) + DBG("XXX: Image did not fit into tree - storing in local memory!\n"); + /* PBO fastpaths: */ if (dims <= 2 && diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c index 5763f4ae1f..1b3aa89c05 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_validate.c +++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c @@ -125,10 +125,13 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) struct intel_texture_object *intelObj = intel_texture_object(tObj); int comp_byte = 0; int cpp; + GLuint face, i; GLuint nr_faces = 0; struct intel_texture_image *firstImage; + GLboolean need_flush = GL_FALSE; + /* We know/require this is true by now: */ assert(intelObj->base._Complete); @@ -224,10 +227,21 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) */ if (intelObj->mt != intelImage->mt) { copy_image_data_to_tree(intel, intelObj, intelImage); + need_flush = GL_TRUE; } } } +#ifdef I915 + /* XXX: what is this flush about? + * On 965, it causes a batch flush in the middle of the state relocation + * emits, which means that the eventual rendering doesn't have all of the + * required relocations in place. + */ + if (need_flush) + intel_batchbuffer_flush(intel->batch); +#endif + return GL_TRUE; } |