diff options
author | Eric Anholt <eric@anholt.net> | 2007-12-07 16:15:49 -0800 |
---|---|---|
committer | Eric Anholt <eric@anholt.net> | 2007-12-07 16:19:10 -0800 |
commit | e3a6e60040b7f6ea7965e52f8f9881ed31e0347c (patch) | |
tree | 6fadf576b904bb479936a355ed716ca305556848 /src | |
parent | 3ecdae82d751f9f404d10332f030e3280949ce4e (diff) |
[965] Convert the driver to dri_bufmgr interface and enable TTM.
This is currently believed to work but be a significant performance loss.
Performance recovery should be soon to follow.
The dri_bo_fake_disable_backing_store() call was added to allow backing store
disable like bufmgr_fake.c did, which is a significant performance win (though
it's missing the no-fence-subdata part).
This commit is a squash merge of the 965-ttm branch, which had some history
I wanted to avoid pulling due to noisiness and brokenness at many points
for git-bisecting.
Diffstat (limited to 'src')
40 files changed, 874 insertions, 2465 deletions
diff --git a/src/mesa/drivers/dri/common/dri_bufmgr.h b/src/mesa/drivers/dri/common/dri_bufmgr.h index b3a170496e..d263ad279b 100644 --- a/src/mesa/drivers/dri/common/dri_bufmgr.h +++ b/src/mesa/drivers/dri/common/dri_bufmgr.h @@ -203,6 +203,10 @@ dri_bufmgr *dri_bufmgr_fake_init(unsigned long low_offset, void *low_virtual, unsigned int cookie), void *driver_priv); void dri_bufmgr_fake_set_debug(dri_bufmgr *bufmgr, GLboolean enable_debug); +void dri_bo_fake_disable_backing_store(dri_bo *bo, + void (*invalidate_cb)(dri_bo *bo, + void *ptr), + void *ptr); void dri_bufmgr_destroy(dri_bufmgr *bufmgr); dri_bo *dri_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, unsigned int handle); diff --git a/src/mesa/drivers/dri/common/dri_bufmgr_fake.c b/src/mesa/drivers/dri/common/dri_bufmgr_fake.c index dda6e5a25e..b9c6bba22a 100644 --- a/src/mesa/drivers/dri/common/dri_bufmgr_fake.c +++ b/src/mesa/drivers/dri/common/dri_bufmgr_fake.c @@ -181,7 +181,7 @@ typedef struct _dri_bo_fake { struct block *block; void *backing_store; - void (*invalidate_cb)(dri_bufmgr *bufmgr, void * ); + void (*invalidate_cb)(dri_bo *bo, void *ptr); void *invalidate_ptr; } dri_bo_fake; @@ -318,9 +318,9 @@ static void free_backing_store(dri_bo *bo) { dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - assert(!(bo_fake->flags & (BM_PINNED|BM_NO_BACKING_STORE))); if (bo_fake->backing_store) { + assert(!(bo_fake->flags & (BM_PINNED|BM_NO_BACKING_STORE))); ALIGN_FREE(bo_fake->backing_store); bo_fake->backing_store = NULL; } @@ -332,8 +332,8 @@ set_dirty(dri_bo *bo) dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - if (bo_fake->flags & BM_NO_BACKING_STORE) - bo_fake->invalidate_cb(&bufmgr_fake->bufmgr, bo_fake->invalidate_ptr); + if (bo_fake->flags & BM_NO_BACKING_STORE && bo_fake->invalidate_cb != NULL) + bo_fake->invalidate_cb(bo, bo_fake->invalidate_ptr); assert(!(bo_fake->flags & BM_PINNED)); @@ -678,6 +678,40 @@ dri_fake_bo_unreference(dri_bo *bo) } /** + * Set the buffer as not requiring backing store, and instead get the callback + * invoked whenever it would be set dirty. + */ +void dri_bo_fake_disable_backing_store(dri_bo *bo, + void (*invalidate_cb)(dri_bo *bo, + void *ptr), + void *ptr) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + + _glthread_LOCK_MUTEX(bufmgr_fake->mutex); + + if (bo_fake->backing_store) + free_backing_store(bo); + + bo_fake->flags |= BM_NO_BACKING_STORE; + + DBG("disable_backing_store set buf %d dirty\n", bo_fake->id); + bo_fake->dirty = 1; + bo_fake->invalidate_cb = invalidate_cb; + bo_fake->invalidate_ptr = ptr; + + /* Note that it is invalid right from the start. Also note + * invalidate_cb is called with the bufmgr locked, so cannot + * itself make bufmgr calls. + */ + if (invalidate_cb != NULL) + invalidate_cb(bo, ptr); + + _glthread_UNLOCK_MUTEX(bufmgr_fake->mutex); +} + +/** * Map a buffer into bo->virtual, allocating either card memory space (If * BM_NO_BACKING_STORE or BM_PINNED) or backing store, as necessary. */ diff --git a/src/mesa/drivers/dri/i915/intel_context.h b/src/mesa/drivers/dri/i915/intel_context.h index 16eb6afed2..993a08ef95 100644 --- a/src/mesa/drivers/dri/i915/intel_context.h +++ b/src/mesa/drivers/dri/i915/intel_context.h @@ -135,6 +135,7 @@ struct intel_context void (*assert_not_dirty) (struct intel_context *intel); + void (*debug_batch)(struct intel_context *intel); } vtbl; GLint refcount; diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 48ecadfd89..5b1a83bccc 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -5,11 +5,11 @@ include $(TOP)/configs/current LIBNAME = i965_dri.so DRIVER_SOURCES = \ - bufmgr_fake.c \ intel_batchbuffer.c \ intel_blit.c \ intel_buffer_objects.c \ intel_buffers.c \ + intel_bufmgr_ttm.c \ intel_context.c \ intel_decode.c \ intel_ioctl.c \ @@ -53,6 +53,7 @@ DRIVER_SOURCES = \ brw_sf_state.c \ brw_state_batch.c \ brw_state_cache.c \ + brw_state_dump.c \ brw_state_pool.c \ brw_state_upload.c \ brw_tex.c \ @@ -80,6 +81,7 @@ DRIVER_SOURCES = \ C_SOURCES = \ $(COMMON_SOURCES) \ + $(COMMON_BM_SOURCES) \ $(MINIGLX_SOURCES) \ $(DRIVER_SOURCES) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 3c227faba6..68afea111d 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -242,7 +242,7 @@ struct brw_surface_binding_table { struct brw_cache; struct brw_mem_pool { - struct buffer *buffer; + dri_bo *buffer; GLuint size; GLuint offset; /* offset of first free byte */ @@ -310,6 +310,8 @@ struct brw_state_pointers { struct brw_tracked_state { struct brw_state_flags dirty; void (*update)( struct brw_context *brw ); + void (*emit_reloc)( struct brw_context *brw ); + GLboolean always_update; }; @@ -596,16 +598,17 @@ struct brw_context GLuint input_size_masks[4]; - /* State structs + /** + * Array of sampler state uploaded at sampler_gs_offset of BRW_SAMPLER + * cache */ - struct brw_sampler_default_color sdc[BRW_MAX_TEX_UNIT]; struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT]; GLuint render_surf; GLuint nr_surfaces; GLuint max_threads; - struct buffer *scratch_buffer; + dri_bo *scratch_buffer; GLuint scratch_buffer_size; GLuint sampler_count; @@ -659,6 +662,10 @@ void brw_init_state( struct brw_context *brw ); void brw_destroy_state( struct brw_context *brw ); +/*====================================================================== + * brw_state_dump.c + */ +void brw_debug_batch(struct intel_context *intel); /*====================================================================== * brw_tex.c diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 2aeb6fabc7..4007dbf9e9 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -312,11 +312,7 @@ static void upload_constant_buffer(struct brw_context *brw) /* Copy data to the buffer: */ - bmBufferSubData(&brw->intel, - pool->buffer, - brw->curbe.gs_offset, - bufsz, - buf); + dri_bo_subdata(pool->buffer, brw->curbe.gs_offset, bufsz, buf); } /* Because this provokes an action (ie copy the constants into the diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 969be594af..87e2202029 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -289,7 +289,7 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, LOCK_HARDWARE(intel); if (brw->intel.numClipRects == 0) { - assert(intel->batch->ptr == intel->batch->map + intel->batch->offset); + assert(intel->batch->ptr == intel->batch->map); UNLOCK_HARDWARE(intel); return GL_TRUE; } @@ -358,14 +358,7 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, * way around this, as not every flush is due to a buffer filling * up. */ - if (!intel_batchbuffer_flush( brw->intel.batch )) { - DBG("%s intel_batchbuffer_flush failed\n", __FUNCTION__); - retval = GL_FALSE; - } - - if (retval && intel->thrashing) { - bmSetFence(intel); - } + intel_batchbuffer_flush( brw->intel.batch ); /* Free any old data so it doesn't clog up texture memory - we * won't be referencing it again. @@ -425,7 +418,6 @@ void brw_draw_prims( GLcontext *ctx, GLuint min_index, GLuint max_index ) { - struct intel_context *intel = intel_context(ctx); GLboolean retval; /* Decide if we want to rebase. If so we end up recursing once @@ -445,20 +437,6 @@ void brw_draw_prims( GLcontext *ctx, */ retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); - - /* This looks like out-of-memory but potentially we have - * situation where there is enough memory but it has become - * fragmented. Clear out all heaps and start from scratch by - * faking a contended lock event: (done elsewhere) - */ - if (!retval && !intel->Fallback && bmError(intel)) { - DBG("retrying\n"); - /* Then try a second time only to upload textures and draw the - * primitives: - */ - retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); - } - /* Otherwise, we really are out of memory. Pass the drawing * command to the software tnl module and which will in turn call * swrast to do the drawing. @@ -469,13 +447,6 @@ void brw_draw_prims( GLcontext *ctx, } } - -static void brw_invalidate_vbo_cb( struct intel_context *intel, void *ptr ) -{ - /* nothing to do, we don't rely on the contents being preserved */ -} - - void brw_draw_init( struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; @@ -490,22 +461,25 @@ void brw_draw_init( struct brw_context *brw ) for (i = 0; i < BRW_NR_UPLOAD_BUFS; i++) { brw->vb.upload.vbo[i] = ctx->Driver.NewBufferObject(ctx, 1, GL_ARRAY_BUFFER_ARB); - - /* NOTE: These are set to no-backing-store. + + ctx->Driver.BufferData(ctx, + GL_ARRAY_BUFFER_ARB, + BRW_UPLOAD_INIT_SIZE, + NULL, + GL_DYNAMIC_DRAW_ARB, + brw->vb.upload.vbo[i]); + + /* Set the internal VBOs to no-backing-store. We only use them as a + * temporary within a brw_try_draw_prims while the lock is held. */ - bmBufferSetInvalidateCB(&brw->intel, - intel_bufferobj_buffer(intel_buffer_object(brw->vb.upload.vbo[i])), - brw_invalidate_vbo_cb, - &brw->intel, - GL_TRUE); - } + if (!brw->intel.intelScreen->ttm) { + struct intel_buffer_object *intel_bo = + intel_buffer_object(brw->vb.upload.vbo[i]); - ctx->Driver.BufferData( ctx, - GL_ARRAY_BUFFER_ARB, - BRW_UPLOAD_INIT_SIZE, - NULL, - GL_DYNAMIC_DRAW_ARB, - brw->vb.upload.vbo[0] ); + dri_bo_fake_disable_backing_store(intel_bufferobj_buffer(intel_bo), + NULL, NULL); + } + } } void brw_draw_destroy( struct brw_context *brw ) diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index b7795703fd..c0da290d5c 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -58,7 +58,7 @@ struct brw_array_state { GLuint dword; } vb0; - struct buffer *buffer; + dri_bo *buffer; GLuint offset; GLuint max_index; @@ -68,7 +68,7 @@ struct brw_array_state { }; -static struct buffer *array_buffer( const struct gl_client_array *array ) +static dri_bo *array_buffer( const struct gl_client_array *array ) { return intel_bufferobj_buffer(intel_buffer_object(array->BufferObj)); } @@ -621,7 +621,7 @@ void brw_upload_indices( struct brw_context *brw, */ { struct brw_indexbuffer ib; - struct buffer *buffer = intel_bufferobj_buffer(intel_buffer_object(bufferobj)); + dri_bo *buffer = intel_bufferobj_buffer(intel_buffer_object(bufferobj)); memset(&ib, 0, sizeof(ib)); diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index d5779680ff..210745c63b 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -123,21 +123,18 @@ const struct brw_tracked_state brw_drawing_rect = { .update = upload_drawing_rect }; -/*********************************************************************** - * Binding table pointers +/** + * Upload the binding table pointers, which point each stage's array of surface + * state pointers. + * + * The binding table pointers are relative to the surface state base address, + * which is the BRW_SS_POOL cache buffer. */ - static void upload_binding_table_pointers(struct brw_context *brw) { struct brw_binding_table_pointers btp; memset(&btp, 0, sizeof(btp)); - /* The binding table has been emitted to the SS pool already, so we - * know what its offset is. When the batch buffer is fired, the - * binding table and surface structs will get fixed up to point to - * where the textures actually landed, but that won't change the - * value of the offsets here: - */ btp.header.opcode = CMD_BINDING_TABLE_PTRS; btp.header.length = sizeof(btp)/4 - 2; btp.vs = 0; @@ -159,11 +156,12 @@ const struct brw_tracked_state brw_binding_table_pointers = { }; -/*********************************************************************** - * Pipelined state pointers. This is the key state packet from which - * the hardware chases pointers to all the uploaded state in VRAM. +/** + * Upload pointers to the per-stage state. + * + * The state pointers in this packet are all relative to the general state + * base address set by CMD_STATE_BASE_ADDRESS, which is the BRW_GS_POOL buffer. */ - static void upload_pipelined_state_pointers(struct brw_context *brw ) { struct brw_pipelined_state_pointers psp; @@ -233,71 +231,53 @@ const struct brw_tracked_state brw_psp_urb_cbs = { .update = upload_psp_urb_cbs }; - - - -/*********************************************************************** - * Depthbuffer - currently constant, but rotation would change that. +/** + * Upload the depthbuffer offset and format. + * + * We have to do this per state validation as we need to emit the relocation + * in the batch buffer. */ - static void upload_depthbuffer(struct brw_context *brw) { - /* 0x79050003 Depth Buffer */ struct intel_context *intel = &brw->intel; struct intel_region *region = brw->state.depth_region; - struct brw_depthbuffer bd; - memset(&bd, 0, sizeof(bd)); - bd.header.bits.opcode = CMD_DEPTH_BUFFER; - bd.header.bits.length = sizeof(bd)/4-2; - bd.dword1.bits.pitch = (region->pitch * region->cpp) - 1; - + unsigned int format; + switch (region->cpp) { case 2: - bd.dword1.bits.format = BRW_DEPTHFORMAT_D16_UNORM; + format = BRW_DEPTHFORMAT_D16_UNORM; break; case 4: if (intel->depth_buffer_is_float) - bd.dword1.bits.format = BRW_DEPTHFORMAT_D32_FLOAT; + format = BRW_DEPTHFORMAT_D32_FLOAT; else - bd.dword1.bits.format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; + format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; break; default: assert(0); return; } - bd.dword1.bits.depth_offset_disable = 0; /* coordinate offset */ - - /* The depthbuffer can only use YMAJOR tiling... This is a bit of - * a shame as it clashes with the 2d blitter which only supports - * XMAJOR tiling... - */ - bd.dword1.bits.tile_walk = BRW_TILEWALK_YMAJOR; - bd.dword1.bits.tiled_surface = intel->depth_region->tiled; - bd.dword1.bits.surface_type = BRW_SURFACE_2D; - - /* BRW_NEW_LOCK */ - bd.dword2_base_addr = bmBufferOffset(intel, region->buffer); - - bd.dword3.bits.mipmap_layout = BRW_SURFACE_MIPMAPLAYOUT_BELOW; - bd.dword3.bits.lod = 0; - bd.dword3.bits.width = region->pitch - 1; /* XXX: width ? */ - bd.dword3.bits.height = region->height - 1; - - bd.dword4.bits.min_array_element = 0; - bd.dword4.bits.depth = 0; - - BRW_CACHED_BATCH_STRUCT(brw, &bd); + BEGIN_BATCH(5, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (5 - 2)); + OUT_BATCH(((region->pitch * region->cpp) - 1) | + (format << 18) | + (BRW_TILEWALK_YMAJOR << 26) | + (region->tiled << 27) | + (BRW_SURFACE_2D << 29)); + OUT_RELOC(region->buffer, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, 0); + OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | + ((region->pitch - 1) << 6) | + ((region->height - 1) << 19)); + OUT_BATCH(0); + ADVANCE_BATCH(); } const struct brw_tracked_state brw_depthbuffer = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_CONTEXT | BRW_NEW_LOCK, - .cache = 0 - }, - .update = upload_depthbuffer + .update = upload_depthbuffer, + .always_update = GL_TRUE, }; @@ -494,40 +474,37 @@ const struct brw_tracked_state brw_invarient_state = { .update = upload_invarient_state }; - -/* State pool addresses: +/** + * Define the base addresses which some state is referenced from. + * + * This allows us to avoid having to emit relocations in many places for + * cached state, and instead emit pointers inside of large, mostly-static + * state pools. This comes at the expense of memory, and more expensive cache + * misses. */ static void upload_state_base_address( struct brw_context *brw ) { struct intel_context *intel = &brw->intel; - struct brw_state_base_address sba; - - memset(&sba, 0, sizeof(sba)); - - sba.header.opcode = CMD_STATE_BASE_ADDRESS; - sba.header.length = 0x4; - - /* BRW_NEW_LOCK */ - sba.bits0.general_state_address = bmBufferOffset(intel, brw->pool[BRW_GS_POOL].buffer) >> 5; - sba.bits0.modify_enable = 1; - - /* BRW_NEW_LOCK */ - sba.bits1.surface_state_address = bmBufferOffset(intel, brw->pool[BRW_SS_POOL].buffer) >> 5; - sba.bits1.modify_enable = 1; - sba.bits2.modify_enable = 1; - sba.bits3.modify_enable = 1; - sba.bits4.modify_enable = 1; - - BRW_CACHED_BATCH_STRUCT(brw, &sba); + /* Output the structure (brw_state_base_address) directly to the + * batchbuffer, so we can emit relocations inline. + */ + BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); + OUT_RELOC(brw->pool[BRW_GS_POOL].buffer, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + 1); /* General state base address */ + OUT_RELOC(brw->pool[BRW_SS_POOL].buffer, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + 1); /* Surface state base address */ + OUT_BATCH(1); /* Indirect object base address */ + OUT_BATCH(1); /* General state upper bound */ + OUT_BATCH(1); /* Indirect object upper bound */ + ADVANCE_BATCH(); } const struct brw_tracked_state brw_state_base_address = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_CONTEXT | BRW_NEW_LOCK, - .cache = 0 - }, + .always_update = GL_TRUE, .update = upload_state_base_address }; diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index b4cbdd7a38..ef2409df5a 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -107,6 +107,12 @@ GLboolean brw_search_cache( struct brw_cache *cache, void brw_init_caches( struct brw_context *brw ); void brw_destroy_caches( struct brw_context *brw ); +static inline dri_bo *brw_cache_buffer(struct brw_context *brw, + enum brw_cache_id id) +{ + return brw->cache[id].pool->buffer; +} + /*********************************************************************** * brw_state_batch.c */ diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c index b78b51328a..eabda257d3 100644 --- a/src/mesa/drivers/dri/i965/brw_state_batch.c +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -95,8 +95,6 @@ static void clear_batch_cache( struct brw_context *brw ) brw_clear_all_caches(brw); - bmReleaseBuffers(&brw->intel); - brw_invalidate_pools(brw); } diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index 0e73ff8390..618e445546 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -187,12 +187,7 @@ GLuint brw_upload_cache( struct brw_cache *cache, /* Copy data to the buffer: */ - bmBufferSubData(&cache->brw->intel, - cache->pool->buffer, - offset, - data_size, - data); - + dri_bo_subdata(cache->pool->buffer, offset, data_size, data); cache->brw->state.dirty.cache |= 1<<cache->id; cache->last_addr = offset; diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c new file mode 100644 index 0000000000..1e8fc97275 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -0,0 +1,131 @@ +/* + * Copyright © 2007 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "mtypes.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +/** + * Prints out a header, the contents, and the message associated with + * the hardware state data given. + * + * \param name Name of the state object + * \param data Pointer to the base of the state object + * \param hw_offset Hardware offset of the base of the state data. + * \param index Index of the DWORD being output. + */ +static void +state_out(char *name, uint32_t *data, uint32_t hw_offset, int index, + char *fmt, ...) +{ + va_list va; + + fprintf(stderr, "%8s: 0x%08x: 0x%08x: ", + name, hw_offset + index * 4, data[index]); + va_start(va, fmt); + vfprintf(stderr, fmt, va); + va_end(va); +} + +/** Generic, undecoded state buffer debug printout */ +static void +state_struct_out(char *name, dri_bo *buffer, unsigned int pool_offset, + unsigned int state_size) +{ + int i; + uint32_t *state; + + state = buffer->virtual + pool_offset; + for (i = 0; i < state_size / 4; i++) { + state_out(name, state, buffer->offset + pool_offset, i, + "dword %d\n", i); + } +} + +static void dump_wm_surface_state(struct brw_context *brw, dri_bo *ss_buffer) +{ + int i; + + for (i = 0; i < brw->wm.nr_surfaces; i++) { + unsigned int surfoff = ss_buffer->offset + brw->wm.bind.surf_ss_offset[i]; + struct brw_surface_state *surf = + (struct brw_surface_state *)(ss_buffer->virtual + + brw->wm.bind.surf_ss_offset[i]); + uint32_t *surfvals = (uint32_t *)surf; + char name[20]; + + sprintf(name, "WM SS%d", i); + state_out(name, surfvals, surfoff, 0, "\n"); + state_out(name, surfvals, surfoff, 1, "offset\n"); + state_out(name, surfvals, surfoff, 2, "%dx%d size, %d mips\n", + surf->ss2.width + 1, surf->ss2.height + 1, surf->ss2.mip_count); + state_out(name, surfvals, surfoff, 3, "pitch %d, %stiled\n", + surf->ss3.pitch + 1, surf->ss3.tiled_surface ? "" : "not "); + state_out(name, surfvals, surfoff, 4, "mip base %d\n", + surf->ss4.min_lod); + } +} + +/** + * Print additional debug information associated with the batchbuffer + * when DEBUG_BATCH is set. + * + * For 965, this means mapping the state buffers that would have been referenced + * by the batchbuffer and dumping them. + * + * The buffer offsets printed rely on the buffer containing the last offset + * it was validated at. + */ +void brw_debug_batch(struct intel_context *intel) +{ + struct brw_context *brw = brw_context(&intel->ctx); + dri_bo *ss_buffer, *gs_buffer; + + ss_buffer = brw->pool[BRW_SS_POOL].buffer; + gs_buffer = brw->pool[BRW_GS_POOL].buffer; + + dri_bo_map(ss_buffer, GL_FALSE); + dri_bo_map(gs_buffer, GL_FALSE); + + state_struct_out("WM bind", ss_buffer, brw->wm.bind_ss_offset, + 4 * brw->wm.nr_surfaces); + dump_wm_surface_state(brw, ss_buffer); + + state_struct_out("VS", gs_buffer, brw->vs.state_gs_offset, + sizeof(struct brw_vs_unit_state)); + state_struct_out("SF", gs_buffer, brw->sf.state_gs_offset, + sizeof(struct brw_sf_unit_state)); + state_struct_out("SF viewport", gs_buffer, brw->sf.state_gs_offset, + sizeof(struct brw_sf_unit_state)); + state_struct_out("WM", gs_buffer, brw->wm.state_gs_offset, + sizeof(struct brw_wm_unit_state)); + + dri_bo_unmap(gs_buffer); + dri_bo_unmap(ss_buffer); +} diff --git a/src/mesa/drivers/dri/i965/brw_state_pool.c b/src/mesa/drivers/dri/i965/brw_state_pool.c index eda92a2fa8..0fc3a1a871 100644 --- a/src/mesa/drivers/dri/i965/brw_state_pool.c +++ b/src/mesa/drivers/dri/i965/brw_state_pool.c @@ -34,7 +34,7 @@ #include "imports.h" #include "intel_ioctl.h" -#include "bufmgr.h" +#include "dri_bufmgr.h" GLboolean brw_pool_alloc( struct brw_mem_pool *pool, GLuint size, @@ -64,28 +64,21 @@ void brw_invalidate_pool( struct intel_context *intel, { if (INTEL_DEBUG & DEBUG_STATE) _mesa_printf("\n\n\n %s \n\n\n", __FUNCTION__); - - bmBufferData(intel, - pool->buffer, - pool->size, - NULL, - 0); pool->offset = 0; brw_clear_all_caches(pool->brw); } -static void brw_invalidate_pool_cb( struct intel_context *intel, void *ptr ) +static void +brw_invalidate_pool_cb(dri_bo *bo, void *ptr) { - struct brw_mem_pool *pool = (struct brw_mem_pool *) ptr; + struct brw_mem_pool *pool = ptr; + struct brw_context *brw = pool->brw; - pool->offset = 0; - brw_clear_all_caches(pool->brw); + brw_invalidate_pool(&brw->intel, pool); } - - static void brw_init_pool( struct brw_context *brw, GLuint pool_id, GLuint size ) @@ -94,30 +87,28 @@ static void brw_init_pool( struct brw_context *brw, pool->size = size; pool->brw = brw; - - bmGenBuffers(&brw->intel, "pool", 1, &pool->buffer, 12); - /* Also want to say not to wait on fences when data is presented - */ - bmBufferSetInvalidateCB(&brw->intel, pool->buffer, - brw_invalidate_pool_cb, - pool, - GL_TRUE); - - bmBufferData(&brw->intel, - pool->buffer, - pool->size, - NULL, - 0); + pool->buffer = dri_bo_alloc(brw->intel.intelScreen->bufmgr, + (pool_id == BRW_GS_POOL) ? "GS pool" : "SS pool", + size, 4096, DRM_BO_FLAG_MEM_TT); + /* Disable the backing store for the state cache. It's not worth the + * cost of keeping a backing store copy, since we can just regenerate + * the contents at approximately the same cost as the memcpy, and only + * if the contents are lost. + */ + if (!brw->intel.intelScreen->ttm) { + dri_bo_fake_disable_backing_store(pool->buffer, brw_invalidate_pool_cb, + pool); + } } static void brw_destroy_pool( struct brw_context *brw, GLuint pool_id ) { struct brw_mem_pool *pool = &brw->pool[pool_id]; - - bmDeleteBuffers(&brw->intel, 1, &pool->buffer); + + dri_bo_unreference(pool->buffer); } diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 92c07c2962..98637a6097 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -33,7 +33,7 @@ #include "brw_context.h" #include "brw_state.h" -#include "bufmgr.h" +#include "dri_bufmgr.h" #include "intel_batchbuffer.h" /* This is used to initialize brw->state.atoms[]. We could use this @@ -210,14 +210,6 @@ void brw_validate_state( struct brw_context *brw ) if (brw->state.dirty.brw & BRW_NEW_CONTEXT) brw_clear_batch_cache_flush(brw); - - /* Make an early reference to the state pools, as we don't cope - * well with them being evicted from here down. - */ - (void)bmBufferOffset(&brw->intel, brw->pool[BRW_GS_POOL].buffer); - (void)bmBufferOffset(&brw->intel, brw->pool[BRW_SS_POOL].buffer); - (void)bmBufferOffset(&brw->intel, brw->intel.batch->buffer); - if (INTEL_DEBUG) { /* Debug version which enforces various sanity checks on the * state flags which are generated and checked to help ensure @@ -233,14 +225,17 @@ void brw_validate_state( struct brw_context *brw ) assert(atom->dirty.mesa || atom->dirty.brw || - atom->dirty.cache); + atom->dirty.cache || + atom->always_update); assert(atom->update); - if (check_state(state, &atom->dirty)) { - brw->state.atoms[i]->update( brw ); + if (check_state(state, &atom->dirty) || atom->always_update) { + atom->update( brw ); /* emit_foo(brw); */ } + if (atom->emit_reloc != NULL) + atom->emit_reloc(brw); accumulate_state(&examined, &atom->dirty); @@ -255,8 +250,12 @@ void brw_validate_state( struct brw_context *brw ) } else { for (i = 0; i < Elements(atoms); i++) { - if (check_state(state, &brw->state.atoms[i]->dirty)) - brw->state.atoms[i]->update( brw ); + const struct brw_tracked_state *atom = brw->state.atoms[i]; + + if (check_state(state, &atom->dirty) || atom->always_update) + atom->update( brw ); + if (atom->emit_reloc != NULL) + atom->emit_reloc(brw); } } diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index d9e8896252..2b42918e15 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -35,6 +35,7 @@ #include "intel_mipmap_tree.h" #include "intel_tex_layout.h" +#include "intel_context.h" #include "macros.h" #define FILE_DEBUG_FLAG DEBUG_MIPTREE diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index d8cb168802..b9dc9ad180 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -49,7 +49,7 @@ #include "brw_state.h" #include "brw_fallback.h" #include "brw_vs.h" - +#include <stdarg.h> /* called from intelDestroyContext() @@ -97,7 +97,7 @@ static void brw_lost_hardware( struct intel_context *intel ) /* Which means there shouldn't be any commands already queued: */ - assert(intel->batch->ptr == intel->batch->map + intel->batch->offset); + assert(intel->batch->ptr == intel->batch->map); brw->state.dirty.mesa |= ~0; brw->state.dirty.brw |= ~0; @@ -153,9 +153,6 @@ static GLuint brw_flush_cmd( void ) return *(GLuint *)&flush; } - - - static void brw_invalidate_state( struct intel_context *intel, GLuint new_state ) { /* nothing */ @@ -178,5 +175,6 @@ void brwInitVtbl( struct brw_context *brw ) brw->intel.vtbl.set_draw_region = brw_set_draw_region; brw->intel.vtbl.flush_cmd = brw_flush_cmd; brw->intel.vtbl.emit_flush = brw_emit_flush; + brw->intel.vtbl.debug_batch = brw_debug_batch; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 7856da0896..a9cdeb8874 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -34,25 +34,19 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" +#include "dri_bufmgr.h" #include "brw_wm.h" -#include "bufmgr.h" /*********************************************************************** * WM unit - fragment programs and rasterization */ -static void invalidate_scratch_cb( struct intel_context *intel, - void *unused ) -{ - /* nothing */ -} - - static void upload_wm_unit(struct brw_context *brw ) { struct intel_context *intel = &brw->intel; struct brw_wm_unit_state wm; GLuint max_threads; + GLuint per_thread; if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) max_threads = 0; @@ -71,41 +65,37 @@ static void upload_wm_unit(struct brw_context *brw ) wm.wm5.max_threads = max_threads; + per_thread = ALIGN(brw->wm.prog_data->total_scratch, 1024); + assert(per_thread <= 12 * 1024); + if (brw->wm.prog_data->total_scratch) { - GLuint per_thread = ALIGN(brw->wm.prog_data->total_scratch, 1024); GLuint total = per_thread * (max_threads + 1); /* Scratch space -- just have to make sure there is sufficient * allocated for the active program and current number of threads. - */ - - if (!brw->wm.scratch_buffer) { - bmGenBuffers(intel, "wm scratch", 1, &brw->wm.scratch_buffer, 12); - bmBufferSetInvalidateCB(intel, - brw->wm.scratch_buffer, - invalidate_scratch_cb, - NULL, - GL_FALSE); + */ + brw->wm.scratch_buffer_size = total; + if (brw->wm.scratch_buffer && + brw->wm.scratch_buffer_size > brw->wm.scratch_buffer->size) { + dri_bo_unreference(brw->wm.scratch_buffer); + brw->wm.scratch_buffer = NULL; } - - if (total > brw->wm.scratch_buffer_size) { - brw->wm.scratch_buffer_size = total; - bmBufferData(intel, - brw->wm.scratch_buffer, - brw->wm.scratch_buffer_size, - NULL, - 0); + if (!brw->wm.scratch_buffer) { + brw->wm.scratch_buffer = dri_bo_alloc(intel->intelScreen->bufmgr, + "wm scratch", + brw->wm.scratch_buffer_size, + 4096, DRM_BO_FLAG_MEM_TT); } - - assert(per_thread <= 12 * 1024); - wm.thread2.per_thread_scratch_space = (per_thread / 1024) - 1; - - /* XXX: could make this dynamic as this is so rarely active: - */ - /* BRW_NEW_LOCK */ - wm.thread2.scratch_space_base_pointer = - bmBufferOffset(intel, brw->wm.scratch_buffer) >> 10; } + /* XXX: Scratch buffers are not implemented correectly. + * + * The scratch offset to be programmed into wm is relative to the general + * state base address. However, using dri_bo_alloc/dri_bo_emit_reloc (or + * the previous bmGenBuffers scheme), we get an offset relative to the + * start of framebuffer. Even before then, it was broken in other ways, + * so just fail for now if we hit that path. + */ + assert(brw->wm.prog_data->total_scratch == 0); /* CACHE_NEW_SURFACE */ wm.thread1.binding_table_entry_count = brw->wm.nr_surfaces; @@ -177,6 +167,19 @@ static void upload_wm_unit(struct brw_context *brw ) wm.wm4.stats_enable = 1; brw->wm.state_gs_offset = brw_cache_data( &brw->cache[BRW_WM_UNIT], &wm ); + + if (brw->wm.prog_data->total_scratch) { + /* + dri_emit_reloc(brw->cache[BRW_WM_UNIT].pool->buffer, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, + (per_thread / 1024) - 1, + brw->wm.state_gs_offset + + ((char *)&wm.thread2 - (char *)&wm), + brw->wm.scratch_buffer); + */ + } else { + wm.thread2.scratch_space_base_pointer = 0; + } } const struct brw_tracked_state brw_wm_unit = { diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 94d72839ff..2ade4eeae8 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -142,7 +142,6 @@ static GLuint translate_tex_format( GLuint mesa_format ) static void brw_update_texture_surface( GLcontext *ctx, GLuint unit ) { - struct intel_context *intel = intel_context(ctx); struct brw_context *brw = brw_context(ctx); struct gl_texture_object *tObj = brw->attribs.Texture->Unit[unit]._Current; struct intel_texture_object *intelObj = intel_texture_object(tObj); @@ -159,9 +158,8 @@ void brw_update_texture_surface( GLcontext *ctx, GLuint unit ) */ /* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ - /* BRW_NEW_LOCK */ - surf.ss1.base_addr = bmBufferOffset(intel, - intelObj->mt->region->buffer); + /* Updated in emit_reloc */ + surf.ss1.base_addr = intelObj->mt->region->buffer->offset; surf.ss2.mip_count = intelObj->lastLevel - intelObj->firstLevel; surf.ss2.width = firstImage->Width - 1; @@ -221,9 +219,8 @@ static void upload_wm_surfaces(struct brw_context *brw ) surf.ss0.writedisable_blue = !brw->attribs.Color->ColorMask[2]; surf.ss0.writedisable_alpha = !brw->attribs.Color->ColorMask[3]; - /* BRW_NEW_LOCK */ - surf.ss1.base_addr = bmBufferOffset(&brw->intel, region->buffer); - + /* Updated in emit_reloc */ + surf.ss1.base_addr = region->buffer->offset; surf.ss2.width = region->pitch - 1; /* XXX: not really! */ surf.ss2.height = region->height - 1; @@ -232,6 +229,7 @@ static void upload_wm_surfaces(struct brw_context *brw ) surf.ss3.pitch = (region->pitch * region->cpp) - 1; brw->wm.bind.surf_ss_offset[0] = brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf ); + brw->wm.nr_surfaces = 1; } @@ -262,14 +260,43 @@ static void upload_wm_surfaces(struct brw_context *brw ) &brw->wm.bind ); } +static void emit_reloc_wm_surfaces(struct brw_context *brw) +{ + int unit; + + /* Emit framebuffer relocation */ + dri_emit_reloc(brw_cache_buffer(brw, BRW_SS_SURFACE), + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, + 0, + brw->wm.bind.surf_ss_offset[0] + + offsetof(struct brw_surface_state, ss1), + brw->state.draw_region->buffer); + + /* Emit relocations for texture buffers */ + for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { + struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[unit]; + struct gl_texture_object *tObj = texUnit->_Current; + struct intel_texture_object *intelObj = intel_texture_object(tObj); + + if (texUnit->_ReallyEnabled && intelObj->mt != NULL) { + dri_emit_reloc(brw_cache_buffer(brw, BRW_SS_SURFACE), + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + 0, + brw->wm.bind.surf_ss_offset[unit + 1] + + offsetof(struct brw_surface_state, ss1), + intelObj->mt->region->buffer); + } + } +} + const struct brw_tracked_state brw_wm_surfaces = { .dirty = { .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS, - .brw = (BRW_NEW_CONTEXT | - BRW_NEW_LOCK), /* required for bmBufferOffset */ + .brw = BRW_NEW_CONTEXT, .cache = 0 }, - .update = upload_wm_surfaces + .update = upload_wm_surfaces, + .emit_reloc = emit_reloc_wm_surfaces, }; diff --git a/src/mesa/drivers/dri/i965/bufmgr.h b/src/mesa/drivers/dri/i965/bufmgr.h deleted file mode 100644 index c7051b963f..0000000000 --- a/src/mesa/drivers/dri/i965/bufmgr.h +++ /dev/null @@ -1,186 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef BUFMGR_H -#define BUFMGR_H - -#include "intel_context.h" - - -/* The buffer manager context. Opaque. - */ -struct bufmgr; -struct buffer; - - -struct bufmgr *bm_fake_intel_Attach( struct intel_context *intel ); - -/* Flags for validate and other calls. If both NO_UPLOAD and NO_EVICT - * are specified, ValidateBuffers is essentially a query. - */ -#define BM_MEM_LOCAL 0x1 -#define BM_MEM_AGP 0x2 -#define BM_MEM_VRAM 0x4 /* not yet used */ -#define BM_WRITE 0x8 /* not yet used */ -#define BM_READ 0x10 /* not yet used */ -#define BM_NO_UPLOAD 0x20 -#define BM_NO_EVICT 0x40 -#define BM_NO_MOVE 0x80 /* not yet used */ -#define BM_NO_ALLOC 0x100 /* legacy "fixed" buffers only */ -#define BM_CLIENT 0x200 /* for map - pointer will be accessed - * without dri lock */ - -#define BM_MEM_MASK (BM_MEM_LOCAL|BM_MEM_AGP|BM_MEM_VRAM) - - - - -/* Create a pool of a given memory type, from a certain offset and a - * certain size. - * - * Also passed in is a virtual pointer to the start of the pool. This - * is useful in the faked-out version in i915 so that MapBuffer can - * return a pointer to a buffer residing in AGP space. - * - * Flags passed into a pool are inherited by all buffers allocated in - * that pool. So pools representing the static front,back,depth - * buffer allocations should have MEM_AGP|NO_UPLOAD|NO_EVICT|NO_MOVE to match - * the behaviour of the legacy allocations. - * - * Returns -1 for failure, pool number for success. - */ -int bmInitPool( struct intel_context *, - unsigned long low_offset, - void *low_virtual, - unsigned long size, - unsigned flags); - - -/* Stick closely to ARB_vbo semantics - they're well defined and - * understood, and drivers can just pass the calls through without too - * much thunking. - */ -void bmGenBuffers(struct intel_context *, const char *, unsigned n, struct buffer **buffers, - int align ); -void bmDeleteBuffers(struct intel_context *, unsigned n, struct buffer **buffers); - - -/* Hook to inform faked buffer manager about fixed-position - * front,depth,back buffers. These may move to a fully memory-managed - * scheme, or they may continue to be managed as is. - */ -struct buffer *bmGenBufferStatic(struct intel_context *, - unsigned pool); - -/* On evict, buffer manager will call invalidate_cb() to note that the - * buffer needs to be reloaded. - * - * Buffer is uploaded by calling bmMapBuffer() and copying data into - * the returned pointer. - * - * This is basically a big hack to get some more performance by - * turning off backing store for buffers where we either have it - * already (textures) or don't need it (batch buffers, temporary - * vbo's). - */ -void bmBufferSetInvalidateCB(struct intel_context *, - struct buffer *buf, - void (*invalidate_cb)( struct intel_context *, void *ptr ), - void *ptr, - GLboolean dont_fence_subdata); - - -/* The driver has more intimate knowledge of the hardare than a GL - * client would, so flags here is more proscriptive than the usage - * values in the ARB_vbo interface: - */ -int bmBufferData(struct intel_context *, - struct buffer *buf, - unsigned size, - const void *data, - unsigned flags ); - -int bmBufferSubData(struct intel_context *, - struct buffer *buf, - unsigned offset, - unsigned size, - const void *data ); - -/* In this version, taking the offset will provoke an upload on - * buffers not already resident in AGP: - */ -unsigned bmBufferOffset(struct intel_context *, - struct buffer *buf); - - -/* Extract data from the buffer: - */ -void bmBufferGetSubData(struct intel_context *, - struct buffer *buf, - unsigned offset, - unsigned size, - void *data ); - -void *bmMapBuffer( struct intel_context *, - struct buffer *buf, - unsigned access ); - -void bmUnmapBuffer( struct intel_context *, - struct buffer *buf ); - -/* Pertains to all buffers who's offset has been taken since the last - * fence or release. - */ -int bmValidateBuffers( struct intel_context * ); -void bmReleaseBuffers( struct intel_context * ); - -GLuint bmCtxId( struct intel_context *intel ); - - -GLboolean bmError( struct intel_context * ); -void bmEvictAll( struct intel_context * ); - -void *bmFindVirtual( struct intel_context *intel, - unsigned int offset, - size_t sz ); - -/* This functionality is used by the buffer manager, not really sure - * if we need to be exposing it in this way, probably libdrm will - * offer equivalent calls. - * - * For now they can stay, but will likely change/move before final: - */ -unsigned bmSetFence( struct intel_context * ); -unsigned bmSetFenceLock( struct intel_context * ); -unsigned bmLockAndFence( struct intel_context *intel ); -int bmTestFence( struct intel_context *, unsigned fence ); -void bmFinishFence( struct intel_context *, unsigned fence ); -void bmFinishFenceLock( struct intel_context *, unsigned fence ); - -void bm_fake_NotifyContendedLockTake( struct intel_context * ); - -#endif diff --git a/src/mesa/drivers/dri/i965/bufmgr_fake.c b/src/mesa/drivers/dri/i965/bufmgr_fake.c deleted file mode 100644 index 4315b272e4..0000000000 --- a/src/mesa/drivers/dri/i965/bufmgr_fake.c +++ /dev/null @@ -1,1361 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Originally a fake version of the buffer manager so that we can - * prototype the changes in a driver fairly quickly, has been fleshed - * out to a fully functional interim solution. - * - * Basically wraps the old style memory management in the new - * programming interface, but is more expressive and avoids many of - * the bugs in the old texture manager. - */ -#include "bufmgr.h" - -#include "intel_context.h" -#include "intel_ioctl.h" -#include "intel_batchbuffer.h" - -#include "simple_list.h" -#include "mm.h" -#include "imports.h" - -#define BM_POOL_MAX 8 - -/* Internal flags: - */ -#define BM_NO_BACKING_STORE 0x2000 -#define BM_NO_FENCE_SUBDATA 0x4000 - -#define FILE_DEBUG_FLAG DEBUG_BUFMGR - -static int check_fenced( struct intel_context *intel ); - -static int nr_attach = 0; - -/* Wrapper around mm.c's mem_block, which understands that you must - * wait for fences to expire before memory can be freed. This is - * specific to our use of memcpy for uploads - an upload that was - * processed through the command queue wouldn't need to care about - * fences. - */ -struct block { - struct block *next, *prev; - struct pool *pool; /* BM_MEM_AGP */ - struct mem_block *mem; /* BM_MEM_AGP */ - - unsigned referenced:1; - unsigned on_hardware:1; - unsigned fenced:1; - - - unsigned fence; /* BM_MEM_AGP, Split to read_fence, write_fence */ - - struct buffer *buf; - void *virtual; -}; - - -struct buffer { - unsigned id; /* debug only */ - const char *name; - unsigned size; - - unsigned mapped:1; - unsigned dirty:1; - unsigned alignment:13; - unsigned flags:16; - - struct block *block; - void *backing_store; - void (*invalidate_cb)( struct intel_context *, void * ); - void *invalidate_ptr; -}; - -struct pool { - unsigned size; - unsigned low_offset; - struct buffer *static_buffer; - unsigned flags; - struct mem_block *heap; - void *virtual; - struct block lru; /* only allocated, non-fence-pending blocks here */ -}; - -struct bufmgr { - _glthread_Mutex mutex; /**< for thread safety */ - struct pool pool[BM_POOL_MAX]; - unsigned nr_pools; - - unsigned buf_nr; /* for generating ids */ - - struct block referenced; /* after bmBufferOffset */ - struct block on_hardware; /* after bmValidateBuffers */ - struct block fenced; /* after bmFenceBuffers (mi_flush, emit irq, write dword) */ - /* then to pool->lru or free() */ - - unsigned ctxId; - unsigned last_fence; - unsigned free_on_hardware; - - unsigned fail:1; - unsigned need_fence:1; -}; - -#define MAXFENCE 0x7fffffff - -static GLboolean FENCE_LTE( unsigned a, unsigned b ) -{ - if (a == b) - return GL_TRUE; - - if (a < b && b - a < (1<<24)) - return GL_TRUE; - - if (a > b && MAXFENCE - a + b < (1<<24)) - return GL_TRUE; - - return GL_FALSE; -} - -int bmTestFence( struct intel_context *intel, unsigned fence ) -{ - /* Slight problem with wrap-around: - */ - return fence == 0 || FENCE_LTE(fence, intel->sarea->last_dispatch); -} - -#define LOCK(bm) \ - int dolock = nr_attach > 1; \ - if (dolock) _glthread_LOCK_MUTEX(bm->mutex) - -#define UNLOCK(bm) \ - if (dolock) _glthread_UNLOCK_MUTEX(bm->mutex) - - - -static GLboolean alloc_from_pool( struct intel_context *intel, - unsigned pool_nr, - struct buffer *buf ) -{ - struct bufmgr *bm = intel->bm; - struct pool *pool = &bm->pool[pool_nr]; - struct block *block = (struct block *)calloc(sizeof *block, 1); - GLuint sz, align = (1<<buf->alignment); - - if (!block) - return GL_FALSE; - - sz = ALIGN(buf->size, align); - - block->mem = mmAllocMem(pool->heap, - sz, - buf->alignment, 0); - if (!block->mem) { - free(block); - return GL_FALSE; - } - - make_empty_list(block); - - /* Insert at head or at tail??? - */ - insert_at_tail(&pool->lru, block); - - block->pool = pool; - block->virtual = pool->virtual + block->mem->ofs; - block->buf = buf; - - buf->block = block; - - return GL_TRUE; -} - - - - - - - - -/* Release the card storage associated with buf: - */ -static void free_block( struct intel_context *intel, struct block *block ) -{ - DBG("free block %p\n", block); - - if (!block) - return; - - check_fenced(intel); - - if (block->referenced) { - _mesa_printf("tried to free block on referenced list\n"); - assert(0); - } - else if (block->on_hardware) { - block->buf = NULL; - intel->bm->free_on_hardware += block->mem->size; - } - else if (block->fenced) { - block->buf = NULL; - } - else { - DBG(" - free immediately\n"); - remove_from_list(block); - - mmFreeMem(block->mem); - free(block); - } -} - - -static void alloc_backing_store( struct intel_context *intel, struct buffer *buf ) -{ - assert(!buf->backing_store); - assert(!(buf->flags & (BM_NO_EVICT|BM_NO_BACKING_STORE))); - - buf->backing_store = ALIGN_MALLOC(buf->size, 64); -} - -static void free_backing_store( struct intel_context *intel, struct buffer *buf ) -{ - assert(!(buf->flags & (BM_NO_EVICT|BM_NO_BACKING_STORE))); - - if (buf->backing_store) { - ALIGN_FREE(buf->backing_store); - buf->backing_store = NULL; - } -} - - - - - - -static void set_dirty( struct intel_context *intel, - struct buffer *buf ) -{ - if (buf->flags & BM_NO_BACKING_STORE) - buf->invalidate_cb(intel, buf->invalidate_ptr); - - assert(!(buf->flags & BM_NO_EVICT)); - - DBG("set_dirty - buf %d\n", buf->id); - buf->dirty = 1; -} - - -static int evict_lru( struct intel_context *intel, GLuint max_fence, GLuint *pool ) -{ - struct bufmgr *bm = intel->bm; - struct block *block, *tmp; - int i; - - DBG("%s\n", __FUNCTION__); - - for (i = 0; i < bm->nr_pools; i++) { - if (!(bm->pool[i].flags & BM_NO_EVICT)) { - foreach_s(block, tmp, &bm->pool[i].lru) { - - if (block->buf && - (block->buf->flags & BM_NO_FENCE_SUBDATA)) - continue; - - if (block->fence && max_fence && - !FENCE_LTE(block->fence, max_fence)) - return 0; - - set_dirty(intel, block->buf); - block->buf->block = NULL; - - free_block(intel, block); - *pool = i; - return 1; - } - } - } - - - return 0; -} - - -#define foreach_s_rev(ptr, t, list) \ - for(ptr=(list)->prev,t=(ptr)->prev; list != ptr; ptr=t, t=(t)->prev) - -static int evict_mru( struct intel_context *intel, GLuint *pool ) -{ - struct bufmgr *bm = intel->bm; - struct block *block, *tmp; - int i; - - DBG("%s\n", __FUNCTION__); - - for (i = 0; i < bm->nr_pools; i++) { - if (!(bm->pool[i].flags & BM_NO_EVICT)) { - foreach_s_rev(block, tmp, &bm->pool[i].lru) { - - if (block->buf && - (block->buf->flags & BM_NO_FENCE_SUBDATA)) - continue; - - set_dirty(intel, block->buf); - block->buf->block = NULL; - - free_block(intel, block); - *pool = i; - return 1; - } - } - } - - - return 0; -} - - -static int check_fenced( struct intel_context *intel ) -{ - struct bufmgr *bm = intel->bm; - struct block *block, *tmp; - int ret = 0; - - foreach_s(block, tmp, &bm->fenced ) { - assert(block->fenced); - - if (bmTestFence(intel, block->fence)) { - - block->fenced = 0; - - if (!block->buf) { - DBG("delayed free: offset %x sz %x\n", block->mem->ofs, block->mem->size); - remove_from_list(block); - mmFreeMem(block->mem); - free(block); - } - else { - DBG("return to lru: offset %x sz %x\n", block->mem->ofs, block->mem->size); - move_to_tail(&block->pool->lru, block); - } - - ret = 1; - } - else { - /* Blocks are ordered by fence, so if one fails, all from - * here will fail also: - */ - break; - } - } - - /* Also check the referenced list: - */ - foreach_s(block, tmp, &bm->referenced ) { - if (block->fenced && - bmTestFence(intel, block->fence)) { - block->fenced = 0; - } - } - - - DBG("%s: %d\n", __FUNCTION__, ret); - return ret; -} - - - -static void fence_blocks( struct intel_context *intel, - unsigned fence ) -{ - struct bufmgr *bm = intel->bm; - struct block *block, *tmp; - - foreach_s (block, tmp, &bm->on_hardware) { - DBG("Fence block %p (sz 0x%x buf %p) with fence %d\n", block, - block->mem->size, block->buf, fence); - block->fence = fence; - - block->on_hardware = 0; - block->fenced = 1; - - /* Move to tail of pending list here - */ - move_to_tail(&bm->fenced, block); - } - - /* Also check the referenced list: - */ - foreach_s (block, tmp, &bm->referenced) { - if (block->on_hardware) { - DBG("Fence block %p (sz 0x%x buf %p) with fence %d\n", block, - block->mem->size, block->buf, fence); - - block->fence = fence; - block->on_hardware = 0; - block->fenced = 1; - } - } - - - bm->last_fence = fence; - assert(is_empty_list(&bm->on_hardware)); -} - - - - -static GLboolean alloc_block( struct intel_context *intel, - struct buffer *buf ) -{ - struct bufmgr *bm = intel->bm; - int i; - - assert(intel->locked); - - DBG("%s 0x%x bytes (%s)\n", __FUNCTION__, buf->size, buf->name); - - for (i = 0; i < bm->nr_pools; i++) { - if (!(bm->pool[i].flags & BM_NO_ALLOC) && - alloc_from_pool(intel, i, buf)) { - - DBG("%s --> 0x%x (sz %x)\n", __FUNCTION__, - buf->block->mem->ofs, buf->block->mem->size); - - return GL_TRUE; - } - } - - DBG("%s --> fail\n", __FUNCTION__); - return GL_FALSE; -} - - -static GLboolean evict_and_alloc_block( struct intel_context *intel, - struct buffer *buf ) -{ - GLuint pool; - struct bufmgr *bm = intel->bm; - - assert(buf->block == NULL); - - /* Put a cap on the amount of free memory we'll allow to accumulate - * before emitting a fence. - */ - if (bm->free_on_hardware > 1 * 1024 * 1024) { - DBG("fence for free space: %x\n", bm->free_on_hardware); - bmSetFence(intel); - } - - /* Search for already free memory: - */ - if (alloc_block(intel, buf)) - return GL_TRUE; - - /* Look for memory that may have become free: - */ - if (check_fenced(intel) && - alloc_block(intel, buf)) - return GL_TRUE; - - /* Look for memory blocks not used for >1 frame: - */ - while (evict_lru(intel, intel->second_last_swap_fence, &pool)) - if (alloc_from_pool(intel, pool, buf)) - return GL_TRUE; - - /* If we're not thrashing, allow lru eviction to dig deeper into - * recently used textures. We'll probably be thrashing soon: - */ - if (!intel->thrashing) { - while (evict_lru(intel, 0, &pool)) - if (alloc_from_pool(intel, pool, buf)) - return GL_TRUE; - } - - /* Keep thrashing counter alive? - */ - if (intel->thrashing) - intel->thrashing = 20; - - /* Wait on any already pending fences - here we are waiting for any - * freed memory that has been submitted to hardware and fenced to - * become available: - */ - while (!is_empty_list(&bm->fenced)) { - GLuint fence = bm->fenced.next->fence; - bmFinishFence(intel, fence); - - if (alloc_block(intel, buf)) - return GL_TRUE; - } - - - /* - */ - if (!is_empty_list(&bm->on_hardware)) { - bmSetFence(intel); - - while (!is_empty_list(&bm->fenced)) { - GLuint fence = bm->fenced.next->fence; - bmFinishFence(intel, fence); - } - - if (!intel->thrashing) { - DBG("thrashing\n"); - } - intel->thrashing = 20; - - if (alloc_block(intel, buf)) - return GL_TRUE; - } - - while (evict_mru(intel, &pool)) - if (alloc_from_pool(intel, pool, buf)) - return GL_TRUE; - - DBG("%s 0x%x bytes failed\n", __FUNCTION__, buf->size); - - assert(is_empty_list(&bm->on_hardware)); - assert(is_empty_list(&bm->fenced)); - - return GL_FALSE; -} - - - - - - - - - - -/*********************************************************************** - * Public functions - */ - - -/* The initialization functions are skewed in the fake implementation. - * This call would be to attach to an existing manager, rather than to - * create a local one. - */ -struct bufmgr *bm_fake_intel_Attach( struct intel_context *intel ) -{ - _glthread_DECLARE_STATIC_MUTEX(initMutex); - static struct bufmgr bm; - - /* This function needs a mutex of its own... - */ - _glthread_LOCK_MUTEX(initMutex); - - if (nr_attach == 0) { - _glthread_INIT_MUTEX(bm.mutex); - - make_empty_list(&bm.referenced); - make_empty_list(&bm.fenced); - make_empty_list(&bm.on_hardware); - - /* The context id of any of the share group. This won't be used - * in communication with the kernel, so it doesn't matter if - * this context is eventually deleted. - */ - bm.ctxId = intel->hHWContext; - } - - nr_attach++; - - _glthread_UNLOCK_MUTEX(initMutex); - - return &bm; -} - - - -/* The virtual pointer would go away in a true implementation. - */ -int bmInitPool( struct intel_context *intel, - unsigned long low_offset, - void *low_virtual, - unsigned long size, - unsigned flags) -{ - struct bufmgr *bm = intel->bm; - int retval = 0; - - LOCK(bm); - { - GLuint i; - - for (i = 0; i < bm->nr_pools; i++) { - if (bm->pool[i].low_offset == low_offset && - bm->pool[i].size == size) { - retval = i; - goto out; - } - } - - - if (bm->nr_pools >= BM_POOL_MAX) - retval = -1; - else { - i = bm->nr_pools++; - - DBG("bmInitPool %d low_offset %x sz %x\n", - i, low_offset, size); - - bm->pool[i].low_offset = low_offset; - bm->pool[i].size = size; - bm->pool[i].heap = mmInit( low_offset, size ); - bm->pool[i].virtual = low_virtual - low_offset; - bm->pool[i].flags = flags; - - make_empty_list(&bm->pool[i].lru); - - retval = i; - } - } - out: - UNLOCK(bm); - return retval; -} - -static struct buffer *do_GenBuffer(struct intel_context *intel, const char *name, int align) -{ - struct bufmgr *bm = intel->bm; - struct buffer *buf = calloc(sizeof(*buf), 1); - - buf->id = ++bm->buf_nr; - buf->name = name; - buf->alignment = align; - buf->flags = BM_MEM_AGP|BM_MEM_VRAM|BM_MEM_LOCAL; - - return buf; -} - - -void *bmFindVirtual( struct intel_context *intel, - unsigned int offset, - size_t sz ) -{ - struct bufmgr *bm = intel->bm; - int i; - - for (i = 0; i < bm->nr_pools; i++) - if (offset >= bm->pool[i].low_offset && - offset + sz <= bm->pool[i].low_offset + bm->pool[i].size) - return bm->pool[i].virtual + offset; - - return NULL; -} - - -void bmGenBuffers(struct intel_context *intel, - const char *name, unsigned n, - struct buffer **buffers, - int align ) -{ - struct bufmgr *bm = intel->bm; - LOCK(bm); - { - int i; - - for (i = 0; i < n; i++) - buffers[i] = do_GenBuffer(intel, name, align); - } - UNLOCK(bm); -} - - -void bmDeleteBuffers(struct intel_context *intel, unsigned n, struct buffer **buffers) -{ - struct bufmgr *bm = intel->bm; - - LOCK(bm); - { - unsigned i; - - for (i = 0; i < n; i++) { - struct buffer *buf = buffers[i]; - - if (buf && buf->block) - free_block(intel, buf->block); - - if (buf) - free(buf); - } - } - UNLOCK(bm); -} - - - - -/* Hook to inform faked buffer manager about fixed-position - * front,depth,back buffers. These may move to a fully memory-managed - * scheme, or they may continue to be managed as is. It will probably - * be useful to pass a fixed offset here one day. - */ -struct buffer *bmGenBufferStatic(struct intel_context *intel, - unsigned pool ) -{ - struct bufmgr *bm = intel->bm; - struct buffer *buf; - LOCK(bm); - { - assert(bm->pool[pool].flags & BM_NO_EVICT); - assert(bm->pool[pool].flags & BM_NO_MOVE); - - if (bm->pool[pool].static_buffer) - buf = bm->pool[pool].static_buffer; - else { - buf = do_GenBuffer(intel, "static", 12); - - bm->pool[pool].static_buffer = buf; - assert(!buf->block); - - buf->size = bm->pool[pool].size; - buf->flags = bm->pool[pool].flags; - buf->alignment = 12; - - if (!alloc_from_pool(intel, pool, buf)) - assert(0); - } - } - UNLOCK(bm); - return buf; -} - - -static void wait_quiescent(struct intel_context *intel, - struct block *block) -{ - if (block->on_hardware) { - assert(intel->bm->need_fence); - bmSetFence(intel); - assert(!block->on_hardware); - } - - - if (block->fenced) { - bmFinishFence(intel, block->fence); - } - - assert(!block->on_hardware); - assert(!block->fenced); -} - - - -/* If buffer size changes, free and reallocate. Otherwise update in - * place. - */ -int bmBufferData(struct intel_context *intel, - struct buffer *buf, - unsigned size, - const void *data, - unsigned flags ) -{ - struct bufmgr *bm = intel->bm; - int retval = 0; - - LOCK(bm); - { - DBG("bmBufferData %d sz 0x%x data: %p\n", buf->id, size, data); - - assert(!buf->mapped); - - if (buf->block) { - struct block *block = buf->block; - - /* Optimistic check to see if we can reuse the block -- not - * required for correctness: - */ - if (block->fenced) - check_fenced(intel); - - if (block->on_hardware || - block->fenced || - (buf->size && buf->size != size) || - (data == NULL)) { - - assert(!block->referenced); - - free_block(intel, block); - buf->block = NULL; - buf->dirty = 1; - } - } - - buf->size = size; - if (buf->block) { - assert (buf->block->mem->size >= size); - } - - if (buf->flags & (BM_NO_BACKING_STORE|BM_NO_EVICT)) { - - assert(intel->locked || data == NULL); - - if (data != NULL) { - if (!buf->block && !evict_and_alloc_block(intel, buf)) { - bm->fail = 1; - retval = -1; - goto out; - } - - wait_quiescent(intel, buf->block); - - DBG("bmBufferData %d offset 0x%x sz 0x%x\n", - buf->id, buf->block->mem->ofs, size); - - assert(buf->block->virtual == buf->block->pool->virtual + buf->block->mem->ofs); - - do_memcpy(buf->block->virtual, data, size); - } - buf->dirty = 0; - } - else { - DBG("%s - set buf %d dirty\n", __FUNCTION__, buf->id); - set_dirty(intel, buf); - free_backing_store(intel, buf); - - if (data != NULL) { - alloc_backing_store(intel, buf); - do_memcpy(buf->backing_store, data, size); - } - } - } - out: - UNLOCK(bm); - return retval; -} - - -/* Update the buffer in place, in whatever space it is currently resident: - */ -int bmBufferSubData(struct intel_context *intel, - struct buffer *buf, - unsigned offset, - unsigned size, - const void *data ) -{ - struct bufmgr *bm = intel->bm; - int retval = 0; - - if (size == 0) - return 0; - - LOCK(bm); - { - DBG("bmBufferSubdata %d offset 0x%x sz 0x%x\n", buf->id, offset, size); - - assert(offset+size <= buf->size); - - if (buf->flags & (BM_NO_EVICT|BM_NO_BACKING_STORE)) { - - assert(intel->locked); - - if (!buf->block && !evict_and_alloc_block(intel, buf)) { - bm->fail = 1; - retval = -1; - goto out; - } - - if (!(buf->flags & BM_NO_FENCE_SUBDATA)) - wait_quiescent(intel, buf->block); - - buf->dirty = 0; - - do_memcpy(buf->block->virtual + offset, data, size); - } - else { - DBG("%s - set buf %d dirty\n", __FUNCTION__, buf->id); - set_dirty(intel, buf); - - if (buf->backing_store == NULL) - alloc_backing_store(intel, buf); - - do_memcpy(buf->backing_store + offset, data, size); - } - } - out: - UNLOCK(bm); - return retval; -} - -unsigned bmBufferOffset(struct intel_context *intel, - struct buffer *buf) -{ - struct bufmgr *bm = intel->bm; - unsigned retval = 0; - - LOCK(bm); - { - assert(intel->locked); - - if (!buf->block && - !evict_and_alloc_block(intel, buf)) { - bm->fail = 1; - retval = ~0; - } - else { - assert(buf->block); - assert(buf->block->buf == buf); - - DBG("Add buf %d (block %p, dirty %d) to referenced list\n", buf->id, buf->block, - buf->dirty); - - move_to_tail(&bm->referenced, buf->block); - buf->block->referenced = 1; - - retval = buf->block->mem->ofs; - } - } - UNLOCK(bm); - - return retval; -} - - - -/* Extract data from the buffer: - */ -void bmBufferGetSubData(struct intel_context *intel, - struct buffer *buf, - unsigned offset, - unsigned size, - void *data ) -{ - struct bufmgr *bm = intel->bm; - - LOCK(bm); - { - DBG("bmBufferSubdata %d offset 0x%x sz 0x%x\n", buf->id, offset, size); - - if (buf->flags & (BM_NO_EVICT|BM_NO_BACKING_STORE)) { - if (buf->block && size) { - wait_quiescent(intel, buf->block); - do_memcpy(data, buf->block->virtual + offset, size); - } - } - else { - if (buf->backing_store && size) { - do_memcpy(data, buf->backing_store + offset, size); - } - } - } - UNLOCK(bm); -} - - -/* Return a pointer to whatever space the buffer is currently resident in: - */ -void *bmMapBuffer( struct intel_context *intel, - struct buffer *buf, - unsigned flags ) -{ - struct bufmgr *bm = intel->bm; - void *retval = NULL; - - LOCK(bm); - { - DBG("bmMapBuffer %d\n", buf->id); - - if (buf->mapped) { - _mesa_printf("%s: already mapped\n", __FUNCTION__); - retval = NULL; - } - else if (buf->flags & (BM_NO_BACKING_STORE|BM_NO_EVICT)) { - - assert(intel->locked); - - if (!buf->block && !evict_and_alloc_block(intel, buf)) { - DBG("%s: alloc failed\n", __FUNCTION__); - bm->fail = 1; - retval = NULL; - } - else { - assert(buf->block); - buf->dirty = 0; - - if (!(buf->flags & BM_NO_FENCE_SUBDATA)) - wait_quiescent(intel, buf->block); - - buf->mapped = 1; - retval = buf->block->virtual; - } - } - else { - DBG("%s - set buf %d dirty\n", __FUNCTION__, buf->id); - set_dirty(intel, buf); - - if (buf->backing_store == 0) - alloc_backing_store(intel, buf); - - buf->mapped = 1; - retval = buf->backing_store; - } - } - UNLOCK(bm); - return retval; -} - -void bmUnmapBuffer( struct intel_context *intel, struct buffer *buf ) -{ - struct bufmgr *bm = intel->bm; - - LOCK(bm); - { - DBG("bmUnmapBuffer %d\n", buf->id); - buf->mapped = 0; - } - UNLOCK(bm); -} - - - - -/* This is the big hack that turns on BM_NO_BACKING_STORE. Basically - * says that an external party will maintain the backing store, eg - * Mesa's local copy of texture data. - */ -void bmBufferSetInvalidateCB(struct intel_context *intel, - struct buffer *buf, - void (*invalidate_cb)( struct intel_context *, void *ptr ), - void *ptr, - GLboolean dont_fence_subdata) -{ - struct bufmgr *bm = intel->bm; - - LOCK(bm); - { - if (buf->backing_store) - free_backing_store(intel, buf); - - buf->flags |= BM_NO_BACKING_STORE; - - if (dont_fence_subdata) - buf->flags |= BM_NO_FENCE_SUBDATA; - - DBG("bmBufferSetInvalidateCB set buf %d dirty\n", buf->id); - buf->dirty = 1; - buf->invalidate_cb = invalidate_cb; - buf->invalidate_ptr = ptr; - - /* Note that it is invalid right from the start. Also note - * invalidate_cb is called with the bufmgr locked, so cannot - * itself make bufmgr calls. - */ - invalidate_cb( intel, ptr ); - } - UNLOCK(bm); -} - - - - - - - -/* This is only protected against thread interactions by the DRI lock - * and the policy of ensuring that all dma is flushed prior to - * releasing that lock. Otherwise you might have two threads building - * up a list of buffers to validate at once. - */ -int bmValidateBuffers( struct intel_context *intel ) -{ - struct bufmgr *bm = intel->bm; - int retval = 0; - - LOCK(bm); - { - DBG("%s fail %d\n", __FUNCTION__, bm->fail); - assert(intel->locked); - - if (!bm->fail) { - struct block *block, *tmp; - - foreach_s(block, tmp, &bm->referenced) { - struct buffer *buf = block->buf; - - DBG("Validate buf %d / block %p / dirty %d\n", buf->id, block, buf->dirty); - - /* Upload the buffer contents if necessary: - */ - if (buf->dirty) { - DBG("Upload dirty buf %d (%s) sz %d offset 0x%x\n", buf->id, - buf->name, buf->size, block->mem->ofs); - - assert(!(buf->flags & (BM_NO_BACKING_STORE|BM_NO_EVICT))); - - wait_quiescent(intel, buf->block); - - do_memcpy(buf->block->virtual, - buf->backing_store, - buf->size); - - buf->dirty = 0; - } - - block->referenced = 0; - block->on_hardware = 1; - move_to_tail(&bm->on_hardware, block); - } - - bm->need_fence = 1; - } - - retval = bm->fail ? -1 : 0; - } - UNLOCK(bm); - - - if (retval != 0) - DBG("%s failed\n", __FUNCTION__); - - return retval; -} - - - - -void bmReleaseBuffers( struct intel_context *intel ) -{ - struct bufmgr *bm = intel->bm; - - LOCK(bm); - { - struct block *block, *tmp; - - foreach_s (block, tmp, &bm->referenced) { - - DBG("remove block %p from referenced list\n", block); - - if (block->on_hardware) { - /* Return to the on-hardware list. - */ - move_to_tail(&bm->on_hardware, block); - } - else if (block->fenced) { - struct block *s; - - /* Hmm - have to scan the fenced list to insert the - * buffers in order. This is O(nm), but rare and the - * numbers are low. - */ - foreach (s, &bm->fenced) { - if (FENCE_LTE(block->fence, s->fence)) - break; - } - - move_to_tail(s, block); - } - else { - /* Return to the lru list: - */ - move_to_tail(&block->pool->lru, block); - } - - block->referenced = 0; - } - } - UNLOCK(bm); -} - - -/* This functionality is used by the buffer manager, not really sure - * if we need to be exposing it in this way, probably libdrm will - * offer equivalent calls. - * - * For now they can stay, but will likely change/move before final: - */ -unsigned bmSetFence( struct intel_context *intel ) -{ - assert(intel->locked); - - /* Emit MI_FLUSH here: - */ - if (intel->bm->need_fence) { - - /* Emit a flush without using a batchbuffer. Can't rely on the - * batchbuffer at this level really. Would really prefer that - * the IRQ ioctly emitted the flush at the same time. - */ - GLuint dword[2]; - dword[0] = intel->vtbl.flush_cmd(); - dword[1] = 0; - intel_cmd_ioctl(intel, (char *)&dword, sizeof(dword)); - - intel->bm->last_fence = intelEmitIrqLocked( intel ); - - fence_blocks(intel, intel->bm->last_fence); - - intel->vtbl.note_fence(intel, intel->bm->last_fence); - intel->bm->need_fence = 0; - - if (intel->thrashing) { - intel->thrashing--; - if (!intel->thrashing) - DBG("not thrashing\n"); - } - - intel->bm->free_on_hardware = 0; - } - - return intel->bm->last_fence; -} - -unsigned bmSetFenceLock( struct intel_context *intel ) -{ - unsigned last; - LOCK(intel->bm); - last = bmSetFence(intel); - UNLOCK(intel->bm); - return last; -} -unsigned bmLockAndFence( struct intel_context *intel ) -{ - if (intel->bm->need_fence) { - LOCK_HARDWARE(intel); - LOCK(intel->bm); - bmSetFence(intel); - UNLOCK(intel->bm); - UNLOCK_HARDWARE(intel); - } - - return intel->bm->last_fence; -} - - -void bmFinishFence( struct intel_context *intel, unsigned fence ) -{ - if (!bmTestFence(intel, fence)) { - DBG("...wait on fence %d\n", fence); - intelWaitIrq( intel, fence ); - } - assert(bmTestFence(intel, fence)); - check_fenced(intel); -} - -void bmFinishFenceLock( struct intel_context *intel, unsigned fence ) -{ - LOCK(intel->bm); - bmFinishFence(intel, fence); - UNLOCK(intel->bm); -} - - -/* Specifically ignore texture memory sharing. - * -- just evict everything - * -- and wait for idle - */ -void bm_fake_NotifyContendedLockTake( struct intel_context *intel ) -{ - struct bufmgr *bm = intel->bm; - - LOCK(bm); - { - struct block *block, *tmp; - GLuint i; - - assert(is_empty_list(&bm->referenced)); - - bm->need_fence = 1; - bm->fail = 0; - bmFinishFence(intel, bmSetFence(intel)); - - assert(is_empty_list(&bm->fenced)); - assert(is_empty_list(&bm->on_hardware)); - - for (i = 0; i < bm->nr_pools; i++) { - if (!(bm->pool[i].flags & BM_NO_EVICT)) { - foreach_s(block, tmp, &bm->pool[i].lru) { - assert(bmTestFence(intel, block->fence)); - set_dirty(intel, block->buf); - } - } - } - } - UNLOCK(bm); -} - - - -void bmEvictAll( struct intel_context *intel ) -{ - struct bufmgr *bm = intel->bm; - - LOCK(bm); - { - struct block *block, *tmp; - GLuint i; - - DBG("%s\n", __FUNCTION__); - - assert(is_empty_list(&bm->referenced)); - - bm->need_fence = 1; - bm->fail = 0; - bmFinishFence(intel, bmSetFence(intel)); - - assert(is_empty_list(&bm->fenced)); - assert(is_empty_list(&bm->on_hardware)); - - for (i = 0; i < bm->nr_pools; i++) { - if (!(bm->pool[i].flags & BM_NO_EVICT)) { - foreach_s(block, tmp, &bm->pool[i].lru) { - assert(bmTestFence(intel, block->fence)); - set_dirty(intel, block->buf); - block->buf->block = NULL; - - free_block(intel, block); - } - } - } - } - UNLOCK(bm); -} - - -GLboolean bmError( struct intel_context *intel ) -{ - struct bufmgr *bm = intel->bm; - GLboolean retval; - - LOCK(bm); - { - retval = bm->fail; - } - UNLOCK(bm); - - return retval; -} - - -GLuint bmCtxId( struct intel_context *intel ) -{ - return intel->bm->ctxId; -} diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index ce0d4402b4..d38cdf31cc 100644..120000 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -1,236 +1 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "imports.h" -#include "intel_batchbuffer.h" -#include "intel_ioctl.h" -#include "intel_decode.h" -#include "bufmgr.h" - -#define FILE_DEBUG_FLAG DEBUG_BATCH - -static void intel_batchbuffer_reset( struct intel_batchbuffer *batch ) -{ - assert(batch->map == NULL); - - batch->offset = (unsigned long)batch->ptr; - batch->offset = ALIGN(batch->offset, 64); - batch->ptr = (unsigned char *) batch->offset; - - if (BATCH_SZ - batch->offset < BATCH_REFILL) { - bmBufferData(batch->intel, - batch->buffer, - BATCH_SZ, - NULL, - 0); - batch->offset = 0; - batch->ptr = NULL; - } - - batch->flags = 0; -} - -static void intel_batchbuffer_reset_cb( struct intel_context *intel, - void *ptr ) -{ - struct intel_batchbuffer *batch = (struct intel_batchbuffer *)ptr; - assert(batch->map == NULL); - batch->flags = 0; - batch->offset = 0; - batch->ptr = NULL; -} - -GLubyte *intel_batchbuffer_map( struct intel_batchbuffer *batch ) -{ - if (!batch->map) { - batch->map = bmMapBuffer(batch->intel, batch->buffer, - BM_MEM_AGP|BM_MEM_LOCAL|BM_CLIENT|BM_WRITE); - batch->ptr += (unsigned long)batch->map; - } - - return batch->map; -} - -void intel_batchbuffer_unmap( struct intel_batchbuffer *batch ) -{ - if (batch->map) { - batch->ptr -= (unsigned long)batch->map; - batch->map = NULL; - bmUnmapBuffer(batch->intel, batch->buffer); - } -} - - - -/*====================================================================== - * Public functions - */ -struct intel_batchbuffer *intel_batchbuffer_alloc( struct intel_context *intel ) -{ - struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1); - - batch->intel = intel; - - bmGenBuffers(intel, "batch", 1, &batch->buffer, 12); - - bmBufferSetInvalidateCB(intel, batch->buffer, - intel_batchbuffer_reset_cb, - batch, - GL_TRUE); - - bmBufferData(batch->intel, - batch->buffer, - BATCH_SZ, - NULL, - 0); - - - return batch; -} - -void intel_batchbuffer_free( struct intel_batchbuffer *batch ) -{ - if (batch->map) - bmUnmapBuffer(batch->intel, batch->buffer); - - bmDeleteBuffers(batch->intel, 1, &batch->buffer); - free(batch); -} - - -#define MI_BATCH_BUFFER_END (0xA<<23) - - -GLboolean intel_batchbuffer_flush( struct intel_batchbuffer *batch ) -{ - struct intel_context *intel = batch->intel; - GLuint used = batch->ptr - (batch->map + batch->offset); - GLuint offset; - GLint retval = GL_TRUE; - - assert(intel->locked); - - if (used == 0) { - bmReleaseBuffers( batch->intel ); - return GL_TRUE; - } - - /* Add the MI_BATCH_BUFFER_END. Always add an MI_FLUSH - this is a - * performance drain that we would like to avoid. - */ - if (used & 4) { - ((int *)batch->ptr)[0] = MI_BATCH_BUFFER_END; - batch->ptr += 4; - used += 4; - } - else { - ((int *)batch->ptr)[0] = 0; - ((int *)batch->ptr)[1] = MI_BATCH_BUFFER_END; - - batch->ptr += 8; - used += 8; - } - - intel_batchbuffer_unmap(batch); - - /* Get the batch buffer offset: Must call bmBufferOffset() before - * bmValidateBuffers(), otherwise the buffer won't be on the inuse - * list. - */ - offset = bmBufferOffset(batch->intel, batch->buffer); - - if (bmValidateBuffers( batch->intel ) != 0) { - assert(intel->locked); - bmReleaseBuffers( batch->intel ); - retval = GL_FALSE; - goto out; - } - - if (INTEL_DEBUG & DEBUG_BATCH) { - char *map; - - map = bmMapBuffer(batch->intel, batch->buffer, - BM_MEM_AGP|BM_MEM_LOCAL|BM_CLIENT); - intel_decode((uint32_t *)(map + batch->offset), used / 4, - offset + batch->offset, intel->intelScreen->deviceID); - bmUnmapBuffer(batch->intel, batch->buffer); - } - - /* Fire the batch buffer, which was uploaded above: - */ - intel_batch_ioctl(batch->intel, - offset + batch->offset, - used); - - /* Reset the buffer: - */ - out: - intel_batchbuffer_reset( batch ); - intel_batchbuffer_map( batch ); - - if (!retval) - DBG("%s failed\n", __FUNCTION__); - - return retval; -} - - - - - - - -void intel_batchbuffer_align( struct intel_batchbuffer *batch, - GLuint align, - GLuint sz ) -{ - unsigned long ptr = (unsigned long) batch->ptr; - unsigned long aptr = ALIGN(ptr, align); - GLuint fixup = aptr - ptr; - - if (intel_batchbuffer_space(batch) < fixup + sz) - intel_batchbuffer_flush(batch); - else { - memset(batch->ptr, 0, fixup); - batch->ptr += fixup; - } -} - - - - -void intel_batchbuffer_data(struct intel_batchbuffer *batch, - const void *data, - GLuint bytes, - GLuint flags) -{ - assert((bytes & 3) == 0); - intel_batchbuffer_require_space(batch, bytes, flags); - __memcpy(batch->ptr, data, bytes); - batch->ptr += bytes; -} - +../intel/intel_batchbuffer.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h deleted file mode 100644 index c40cad9638..0000000000 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h +++ /dev/null @@ -1,133 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef INTEL_BATCHBUFFER_H -#define INTEL_BATCHBUFFER_H - -#include "mtypes.h" -#include "bufmgr.h" - -struct intel_context; - -#define BATCH_SZ (16 * 1024) -#define BATCH_REFILL 4096 -#define BATCH_RESERVED 16 - -#define INTEL_BATCH_NO_CLIPRECTS 0x1 -#define INTEL_BATCH_CLIPRECTS 0x2 - -struct intel_batchbuffer { - struct intel_context *intel; - - struct buffer *buffer; - - GLuint flags; - unsigned long offset; - - GLubyte *map; - GLubyte *ptr; -}; - -struct intel_batchbuffer *intel_batchbuffer_alloc( struct intel_context *intel ); - -void intel_batchbuffer_free( struct intel_batchbuffer *batch ); - - -GLboolean intel_batchbuffer_flush( struct intel_batchbuffer *batch ); - -void intel_batchbuffer_unmap( struct intel_batchbuffer *batch ); -GLubyte *intel_batchbuffer_map( struct intel_batchbuffer *batch ); - - -/* Unlike bmBufferData, this currently requires the buffer be mapped. - * Consider it a convenience function wrapping multple - * intel_buffer_dword() calls. - */ -void intel_batchbuffer_data(struct intel_batchbuffer *batch, - const void *data, - GLuint bytes, - GLuint flags); - -void intel_batchbuffer_release_space(struct intel_batchbuffer *batch, - GLuint bytes); - - -/* Inline functions - might actually be better off with these - * non-inlined. Certainly better off switching all command packets to - * be passed as structs rather than dwords, but that's a little bit of - * work... - */ -static inline GLuint -intel_batchbuffer_space( struct intel_batchbuffer *batch ) -{ - return (BATCH_SZ - BATCH_RESERVED) - (batch->ptr - (batch->map + batch->offset)); -} - - -static inline void -intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, - GLuint dword) -{ - assert(batch->map); - assert(intel_batchbuffer_space(batch) >= 4); - *(GLuint *)(batch->ptr) = dword; - batch->ptr += 4; -} - -static inline void -intel_batchbuffer_require_space(struct intel_batchbuffer *batch, - GLuint sz, - GLuint flags) -{ - assert(sz < BATCH_SZ - 8); - if (intel_batchbuffer_space(batch) < sz || - (batch->flags != 0 && flags != 0 && batch->flags != flags)) - intel_batchbuffer_flush(batch); - - batch->flags |= flags; -} - -void intel_batchbuffer_align( struct intel_batchbuffer *batch, - GLuint align, - GLuint sz ); - - -/* Here are the crusty old macros, to be removed: - */ -#define BATCH_LOCALS -#define BEGIN_BATCH(n, flags) intel_batchbuffer_require_space(intel->batch, n*4, flags) -#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d) - -#define OUT_RELOC(buf, flags, delta) do { \ - assert((delta) >= 0); \ - OUT_BATCH(bmBufferOffset(intel, buf) + delta); \ -} while (0) - -#define ADVANCE_BATCH() do { } while(0) - - -#endif diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index 374022e64a..dd9c871902 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -41,7 +41,7 @@ #include "intel_regions.h" #include "intel_structs.h" -#include "bufmgr.h" +#include "dri_bufmgr.h" #define FILE_DEBUG_FLAG DEBUG_BLIT @@ -64,8 +64,13 @@ void intelCopyBuffer( __DRIdrawablePrivate *dPriv, intel = (struct intel_context *) dPriv->driContextPriv->driverPrivate; intelFlush( &intel->ctx ); - - bmFinishFenceLock(intel, intel->last_swap_fence); + if (intel->last_swap_fence) { + dri_fence_wait(intel->last_swap_fence); + dri_fence_unreference(intel->last_swap_fence); + intel->last_swap_fence = NULL; + } + intel->last_swap_fence = intel->first_swap_fence; + intel->first_swap_fence = NULL; /* The LOCK_HARDWARE is required for the cliprects. Buffer offsets * should work regardless. @@ -151,9 +156,12 @@ void intelCopyBuffer( __DRIdrawablePrivate *dPriv, } } - intel_batchbuffer_flush( intel->batch ); - intel->second_last_swap_fence = intel->last_swap_fence; - intel->last_swap_fence = bmSetFenceLock( intel ); + if (intel->first_swap_fence) + dri_fence_unreference(intel->first_swap_fence); + intel_batchbuffer_flush(intel->batch); + intel->first_swap_fence = intel->batch->last_fence; + if (intel->first_swap_fence != NULL) + dri_fence_reference(intel->first_swap_fence); UNLOCK_HARDWARE( intel ); if (!rect) @@ -176,7 +184,7 @@ void intelCopyBuffer( __DRIdrawablePrivate *dPriv, void intelEmitFillBlit( struct intel_context *intel, GLuint cpp, GLshort dst_pitch, - struct buffer *dst_buffer, + dri_bo *dst_buffer, GLuint dst_offset, GLboolean dst_tiled, GLshort x, GLshort y, @@ -247,11 +255,11 @@ static GLuint translate_raster_op(GLenum logicop) void intelEmitCopyBlit( struct intel_context *intel, GLuint cpp, GLshort src_pitch, - struct buffer *src_buffer, + dri_bo *src_buffer, GLuint src_offset, GLboolean src_tiled, GLshort dst_pitch, - struct buffer *dst_buffer, + dri_bo *dst_buffer, GLuint dst_offset, GLboolean dst_tiled, GLshort src_x, GLshort src_y, @@ -524,7 +532,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, GLubyte *src_bits, GLuint src_size, GLuint fg_color, GLshort dst_pitch, - struct buffer *dst_buffer, + dri_bo *dst_buffer, GLuint dst_offset, GLboolean dst_tiled, GLshort x, GLshort y, diff --git a/src/mesa/drivers/dri/i965/intel_blit.h b/src/mesa/drivers/dri/i965/intel_blit.h index 1412baf1c0..b7d556b1a9 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.h +++ b/src/mesa/drivers/dri/i965/intel_blit.h @@ -31,8 +31,6 @@ #include "intel_context.h" #include "intel_ioctl.h" -struct buffer; - extern void intelCopyBuffer( __DRIdrawablePrivate *dpriv, const drm_clip_rect_t *rect ); extern void intelClearWithBlit(GLcontext *ctx, GLbitfield mask); @@ -40,11 +38,11 @@ extern void intelClearWithBlit(GLcontext *ctx, GLbitfield mask); extern void intelEmitCopyBlit( struct intel_context *intel, GLuint cpp, GLshort src_pitch, - struct buffer *src_buffer, + dri_bo *src_buffer, GLuint src_offset, GLboolean src_tiled, GLshort dst_pitch, - struct buffer *dst_buffer, + dri_bo *dst_buffer, GLuint dst_offset, GLboolean dst_tiled, GLshort srcx, GLshort srcy, @@ -55,7 +53,7 @@ extern void intelEmitCopyBlit( struct intel_context *intel, extern void intelEmitFillBlit( struct intel_context *intel, GLuint cpp, GLshort dst_pitch, - struct buffer *dst_buffer, + dri_bo *dst_buffer, GLuint dst_offset, GLboolean dst_tiled, GLshort x, GLshort y, @@ -68,7 +66,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, GLubyte *src_bits, GLuint src_size, GLuint fg_color, GLshort dst_pitch, - struct buffer *dst_buffer, + dri_bo *dst_buffer, GLuint dst_offset, GLboolean dst_tiled, GLshort dst_x, GLshort dst_y, diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c index 3349284f5d..6ed31c9458 100644 --- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c @@ -32,8 +32,17 @@ #include "intel_context.h" #include "intel_buffer_objects.h" -#include "bufmgr.h" +#include "dri_bufmgr.h" +/** Allocates a new dri_bo to store the data for the buffer object. */ +static void +intel_bufferobj_alloc_buffer(struct intel_context *intel, + struct intel_buffer_object *intel_obj) +{ + intel_obj->buffer = dri_bo_alloc(intel->intelScreen->bufmgr, "bufferobj", + intel_obj->Base.Size, 64, + DRM_BO_FLAG_MEM_TT); +} /** * There is some duplication between mesa's bufferobjects and our @@ -45,16 +54,10 @@ static struct gl_buffer_object *intel_bufferobj_alloc( GLcontext *ctx, GLuint name, GLenum target ) { - struct intel_context *intel = intel_context(ctx); - struct intel_buffer_object *obj = MALLOC_STRUCT(intel_buffer_object); + struct intel_buffer_object *obj = CALLOC_STRUCT(intel_buffer_object); _mesa_initialize_buffer_object(&obj->Base, name, target); - /* XXX: We generate our own handle, which is different to 'name' above. - */ - bmGenBuffers(intel, "bufferobj", 1, &obj->buffer, 6); - assert(obj->buffer); - return &obj->Base; } @@ -66,14 +69,13 @@ static struct gl_buffer_object *intel_bufferobj_alloc( GLcontext *ctx, static void intel_bufferobj_free( GLcontext *ctx, struct gl_buffer_object *obj ) { - struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); assert(intel_obj); - if (intel_obj->buffer) - bmDeleteBuffers( intel, 1, &intel_obj->buffer ); - + if (intel_obj->buffer) + dri_bo_unreference(intel_obj->buffer); + _mesa_free(intel_obj); } @@ -103,7 +105,23 @@ static void intel_bufferobj_data( GLcontext *ctx, obj->Size = size; obj->Usage = usage; - bmBufferData(intel, intel_obj->buffer, size, data, 0); + /* While it would seem to make sense to always reallocate the buffer here, + * since it should allow us better concurrency between rendering and + * map-cpu write-unmap, doing so was a minor (~10%) performance loss + * for both classic and TTM mode with openarena. That may change with + * improved buffer manager algorithms. + */ + if (intel_obj->buffer != NULL && intel_obj->buffer->size != size) { + dri_bo_unreference(intel_obj->buffer); + intel_obj->buffer = NULL; + } + if (size != 0) { + if (intel_obj->buffer == NULL) + intel_bufferobj_alloc_buffer(intel, intel_obj); + + if (data != NULL) + dri_bo_subdata(intel_obj->buffer, 0, size, data); + } } @@ -120,11 +138,10 @@ static void intel_bufferobj_subdata( GLcontext *ctx, const GLvoid * data, struct gl_buffer_object * obj ) { - struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); assert(intel_obj); - bmBufferSubData(intel, intel_obj->buffer, offset, size, data); + dri_bo_subdata(intel_obj->buffer, offset, size, data); } @@ -138,11 +155,10 @@ static void intel_bufferobj_get_subdata( GLcontext *ctx, GLvoid * data, struct gl_buffer_object * obj ) { - struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); assert(intel_obj); - bmBufferGetSubData(intel, intel_obj->buffer, offset, size, data); + dri_bo_get_subdata(intel_obj->buffer, offset, size, data); } @@ -155,14 +171,15 @@ static void *intel_bufferobj_map( GLcontext *ctx, GLenum access, struct gl_buffer_object *obj ) { - struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); /* XXX: Translate access to flags arg below: */ assert(intel_obj); assert(intel_obj->buffer); - obj->Pointer = bmMapBuffer(intel, intel_obj->buffer, 0); + + dri_bo_map(intel_obj->buffer, GL_TRUE); + obj->Pointer = intel_obj->buffer->virtual; return obj->Pointer; } @@ -174,18 +191,17 @@ static GLboolean intel_bufferobj_unmap( GLcontext *ctx, GLenum target, struct gl_buffer_object *obj ) { - struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); assert(intel_obj); assert(intel_obj->buffer); assert(obj->Pointer); - bmUnmapBuffer(intel, intel_obj->buffer); + dri_bo_unmap(intel_obj->buffer); obj->Pointer = NULL; return GL_TRUE; } -struct buffer *intel_bufferobj_buffer( const struct intel_buffer_object *intel_obj ) +dri_bo *intel_bufferobj_buffer( const struct intel_buffer_object *intel_obj ) { assert(intel_obj->Base.Name); assert(intel_obj->buffer); diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.h b/src/mesa/drivers/dri/i965/intel_buffer_objects.h index 4b38803e57..a80f448716 100644 --- a/src/mesa/drivers/dri/i965/intel_buffer_objects.h +++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.h @@ -39,13 +39,13 @@ struct gl_buffer_object; */ struct intel_buffer_object { struct gl_buffer_object Base; - struct buffer *buffer; /* the low-level buffer manager's buffer handle */ + dri_bo *buffer; /* the low-level buffer manager's buffer handle */ }; /* Get the bm buffer associated with a GL bufferobject: */ -struct buffer *intel_bufferobj_buffer( const struct intel_buffer_object *obj ); +dri_bo *intel_bufferobj_buffer( const struct intel_buffer_object *obj ); /* Hook the bufferobject implementation into mesa: */ diff --git a/src/mesa/drivers/dri/i965/intel_bufmgr_ttm.c b/src/mesa/drivers/dri/i965/intel_bufmgr_ttm.c new file mode 120000 index 0000000000..e9df5c6279 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_bufmgr_ttm.c @@ -0,0 +1 @@ +../intel/intel_bufmgr_ttm.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_context.c b/src/mesa/drivers/dri/i965/intel_context.c index b825078761..73f4f96400 100644 --- a/src/mesa/drivers/dri/i965/intel_context.c +++ b/src/mesa/drivers/dri/i965/intel_context.c @@ -58,8 +58,9 @@ #include "intel_blit.h" #include "intel_regions.h" #include "intel_buffer_objects.h" +#include "intel_decode.h" -#include "bufmgr.h" +#include "dri_bufmgr.h" #include "utils.h" #include "vblank.h" @@ -267,14 +268,20 @@ void intelFlush( GLcontext *ctx ) { struct intel_context *intel = intel_context( ctx ); - bmLockAndFence(intel); + if (intel->batch->map != intel->batch->ptr) + intel_batchbuffer_flush(intel->batch); } void intelFinish( GLcontext *ctx ) { struct intel_context *intel = intel_context( ctx ); - bmFinishFence(intel, bmLockAndFence(intel)); + intelFlush(ctx); + if (intel->batch->last_fence) { + dri_fence_wait(intel->batch->last_fence); + dri_fence_unreference(intel->batch->last_fence); + intel->batch->last_fence = NULL; + } } static void @@ -333,7 +340,23 @@ void intelInitDriverFunctions( struct dd_function_table *functions ) intelInitBufferFuncs( functions ); } +static void +intel_update_screen_regions(struct intel_context *intel) +{ + intel->bufmgr = intel->intelScreen->bufmgr; + + intel_region_release(intel, &intel->front_region); + intel_region_reference(&intel->front_region, + intel->intelScreen->front_region); + intel_region_release(intel, &intel->back_region); + intel_region_reference(&intel->back_region, + intel->intelScreen->back_region); + + intel_region_release(intel, &intel->depth_region); + intel_region_reference(&intel->depth_region, + intel->intelScreen->depth_region); +} GLboolean intelInitContext( struct intel_context *intel, const __GLcontextModes *mesaVis, @@ -439,8 +462,6 @@ GLboolean intelInitContext( struct intel_context *intel, /* Initialize swrast, tnl driver tables: */ intelInitSpanFuncs( ctx ); - intel->no_hw = getenv("INTEL_NO_HW") != NULL; - if (!intel->intelScreen->irq_active) { _mesa_printf("IRQs not active. Exiting\n"); exit(1); @@ -450,61 +471,12 @@ GLboolean intelInitContext( struct intel_context *intel, INTEL_DEBUG = driParseDebugString( getenv( "INTEL_DEBUG" ), debug_control ); + intel_update_screen_regions(intel); - /* Buffer manager: - */ - intel->bm = bm_fake_intel_Attach( intel ); - - - bmInitPool(intel, - intel->intelScreen->tex.offset, /* low offset */ - intel->intelScreen->tex.map, /* low virtual */ - intel->intelScreen->tex.size, - BM_MEM_AGP); - - /* These are still static, but create regions for them. - */ - intel->front_region = - intel_region_create_static(intel, - BM_MEM_AGP, - intelScreen->front.offset, - intelScreen->front.map, - intelScreen->cpp, - intelScreen->front.pitch / intelScreen->cpp, - intelScreen->height, - intelScreen->front.size, - intelScreen->front.tiled != 0); - - intel->back_region = - intel_region_create_static(intel, - BM_MEM_AGP, - intelScreen->back.offset, - intelScreen->back.map, - intelScreen->cpp, - intelScreen->back.pitch / intelScreen->cpp, - intelScreen->height, - intelScreen->back.size, - intelScreen->back.tiled != 0); - - /* Still assuming front.cpp == depth.cpp - * - * XXX: Setting tiling to false because Depth tiling only supports - * YMAJOR but the blitter only supports XMAJOR tiling. Have to - * resolve later. - */ - intel->depth_region = - intel_region_create_static(intel, - BM_MEM_AGP, - intelScreen->depth.offset, - intelScreen->depth.map, - intelScreen->cpp, - intelScreen->depth.pitch / intelScreen->cpp, - intelScreen->height, - intelScreen->depth.size, - intelScreen->depth.tiled != 0); - intel_bufferobj_init( intel ); intel->batch = intel_batchbuffer_alloc( intel ); + intel->last_swap_fence = NULL; + intel->first_swap_fence = NULL; if (intel->ctx.Mesa_DXTn) { _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" ); @@ -549,7 +521,17 @@ void intelDestroyContext(__DRIcontextPrivate *driContextPriv) intel->Fallback = 0; /* don't call _swrast_Flush later */ intel_batchbuffer_free(intel->batch); intel->batch = NULL; - + + if (intel->last_swap_fence) { + dri_fence_wait(intel->last_swap_fence); + dri_fence_unreference(intel->last_swap_fence); + intel->last_swap_fence = NULL; + } + if (intel->first_swap_fence) { + dri_fence_wait(intel->first_swap_fence); + dri_fence_unreference(intel->first_swap_fence); + intel->first_swap_fence = NULL; + } if ( release_texture_heaps ) { /* This share group is about to go away, free our private @@ -628,7 +610,6 @@ static void intelContendedLock( struct intel_context *intel, GLuint flags ) __DRIscreenPrivate *sPriv = intel->driScreen; volatile drmI830Sarea * sarea = intel->sarea; int me = intel->hHWContext; - int my_bufmgr = bmCtxId(intel); drmGetLock(intel->driFd, intel->hHWContext, flags); @@ -655,16 +636,20 @@ static void intelContendedLock( struct intel_context *intel, GLuint flags ) intel->vtbl.lost_hardware( intel ); } - /* As above, but don't evict the texture data on transitions - * between contexts which all share a local buffer manager. + /* If the last consumer of the texture memory wasn't us, notify the fake + * bufmgr and record the new owner. We should have the memory shared + * between contexts of a single fake bufmgr, but this will at least make + * things correct for now. */ - if (sarea->texAge != my_bufmgr) { + if (!intel->intelScreen->ttm && sarea->texAge != intel->hHWContext) { + sarea->texAge = intel->hHWContext; + dri_bufmgr_fake_contended_lock_take(intel->intelScreen->bufmgr); + if (INTEL_DEBUG & DEBUG_BATCH) + intel_decode_context_reset(); if (INTEL_DEBUG & DEBUG_BUFMGR) { - fprintf(stderr, "Lost Textures: sarea->texAge %x my_bufmgr %x\n", - sarea->ctxOwner, my_bufmgr); + fprintf(stderr, "Lost Textures: sarea->texAge %x hw context %x\n", + sarea->ctxOwner, intel->hHWContext); } - sarea->texAge = my_bufmgr; - bm_fake_NotifyContendedLockTake( intel ); } /* Drawable changed? @@ -694,29 +679,6 @@ void LOCK_HARDWARE( struct intel_context *intel ) intel->locked = 1; - if (bmError(intel)) { - bmEvictAll(intel); - intel->vtbl.lost_hardware( intel ); - } - - /* Make sure nothing has been emitted prior to getting the lock: - */ - assert(intel->batch->map == 0); - - /* XXX: postpone, may not be needed: - */ - if (!intel_batchbuffer_map(intel->batch)) { - bmEvictAll(intel); - intel->vtbl.lost_hardware( intel ); - - /* This could only fail if the batchbuffer was greater in size - * than the available texture memory: - */ - if (!intel_batchbuffer_map(intel->batch)) { - _mesa_printf("double failure to map batchbuffer\n"); - assert(0); - } - } } @@ -724,11 +686,6 @@ void LOCK_HARDWARE( struct intel_context *intel ) */ void UNLOCK_HARDWARE( struct intel_context *intel ) { - /* Make sure everything has been released: - */ - assert(intel->batch->ptr == intel->batch->map + intel->batch->offset); - - intel_batchbuffer_unmap(intel->batch); intel->vtbl.note_unlock( intel ); intel->locked = 0; diff --git a/src/mesa/drivers/dri/i965/intel_context.h b/src/mesa/drivers/dri/i965/intel_context.h index fa35f3fbe5..4aa9413bec 100644 --- a/src/mesa/drivers/dri/i965/intel_context.h +++ b/src/mesa/drivers/dri/i965/intel_context.h @@ -151,17 +151,16 @@ struct intel_context GLfloat s0, GLfloat s1, GLfloat t0, GLfloat t1); - - + void (*debug_batch)(struct intel_context *intel); } vtbl; GLint refcount; GLuint Fallback; GLuint NewGLState; - GLuint last_swap_fence; - GLuint second_last_swap_fence; - + dri_fence *first_swap_fence; + dri_fence *last_swap_fence; + GLuint stats_wm; struct intel_batchbuffer *batch; @@ -178,29 +177,17 @@ struct intel_context GLboolean hw_stencil; GLboolean hw_stipple; GLboolean depth_buffer_is_float; - GLboolean no_hw; GLboolean no_rast; - GLboolean thrashing; GLboolean locked; GLboolean strict_conformance; GLboolean need_flush; - - - /* AGP memory buffer manager: - */ - struct bufmgr *bm; - - /* State for intelvb.c and inteltris.c. */ GLenum render_primitive; GLenum reduced_primitive; - struct intel_region *front_region; - struct intel_region *back_region; struct intel_region *draw_region; - struct intel_region *depth_region; /* These refer to the current draw (front vs. back) buffer: */ @@ -218,6 +205,12 @@ struct intel_context drmLock *driHwLock; int driFd; + /* Cached values from the screen private. */ + dri_bufmgr *bufmgr; + struct intel_region *front_region; + struct intel_region *back_region; + struct intel_region *depth_region; + __DRIdrawablePrivate *driDrawable; __DRIdrawablePrivate *driReadDrawable; __DRIscreenPrivate *driScreen; diff --git a/src/mesa/drivers/dri/i965/intel_ioctl.c b/src/mesa/drivers/dri/i965/intel_ioctl.c index e7e736079f..50ad4e4f1f 100644 --- a/src/mesa/drivers/dri/i965/intel_ioctl.c +++ b/src/mesa/drivers/dri/i965/intel_ioctl.c @@ -41,43 +41,36 @@ #include "intel_blit.h" #include "intel_regions.h" #include "drm.h" -#include "bufmgr.h" +#include "dri_bufmgr.h" +#include "intel_bufmgr_ttm.h" +#include "i915_drm.h" -static int intelWaitIdleLocked( struct intel_context *intel ) +static void intelWaitIdleLocked( struct intel_context *intel ) { - static int in_wait_idle = 0; unsigned int fence; - if (!in_wait_idle) { - if (INTEL_DEBUG & DEBUG_SYNC) { - fprintf(stderr, "waiting for idle\n"); - } - - in_wait_idle = 1; - fence = bmSetFence(intel); - intelWaitIrq(intel, fence); - in_wait_idle = 0; + if (INTEL_DEBUG & DEBUG_SYNC) + fprintf(stderr, "waiting for idle\n"); - return bmTestFence(intel, fence); - } else { - return 1; - } + fence = intelEmitIrqLocked(intel->intelScreen); + intelWaitIrq(intel->intelScreen, fence); } -int intelEmitIrqLocked( struct intel_context *intel ) +int intelEmitIrqLocked( intelScreenPrivate *intelScreen ) { int seq = 1; - if (!intel->no_hw) { + if (!intelScreen->no_hw) { drmI830IrqEmit ie; int ret; - + /* assert(((*(int *)intel->driHwLock) & ~DRM_LOCK_CONT) == (DRM_LOCK_HELD|intel->hHWContext)); - + */ ie.irq_seq = &seq; - ret = drmCommandWriteRead( intel->driFd, DRM_I830_IRQ_EMIT, + ret = drmCommandWriteRead( intelScreen->driScrnPriv->fd, + DRM_I830_IRQ_EMIT, &ie, sizeof(ie) ); if ( ret ) { fprintf( stderr, "%s: drmI830IrqEmit: %d\n", __FUNCTION__, ret ); @@ -91,26 +84,32 @@ int intelEmitIrqLocked( struct intel_context *intel ) return seq; } -void intelWaitIrq( struct intel_context *intel, int seq ) +void intelWaitIrq( intelScreenPrivate *intelScreen, int seq ) { - if (!intel->no_hw) { + if (!intelScreen->no_hw) { drmI830IrqWait iw; int ret, lastdispatch; - + volatile drmI830Sarea *sarea = (volatile drmI830Sarea *) + (((GLubyte *)intelScreen->driScrnPriv->pSAREA) + + intelScreen->sarea_priv_offset); + if (0) fprintf(stderr, "%s %d\n", __FUNCTION__, seq ); iw.irq_seq = seq; do { - lastdispatch = intel->sarea->last_dispatch; - ret = drmCommandWrite( intel->driFd, DRM_I830_IRQ_WAIT, &iw, sizeof(iw) ); + lastdispatch = sarea->last_dispatch; + ret = drmCommandWrite( intelScreen->driScrnPriv->fd, + DRM_I830_IRQ_WAIT, &iw, sizeof(iw) ); /* This seems quite often to return before it should!?! */ - } while (ret == -EAGAIN || ret == -EINTR || (ret == -EBUSY && lastdispatch != intel->sarea->last_dispatch) || (ret == 0 && seq > intel->sarea->last_dispatch) - || (ret == 0 && intel->sarea->last_dispatch - seq >= (1 << 24))); - + } while (ret == -EAGAIN || + ret == -EINTR || + (ret == -EBUSY && lastdispatch != sarea->last_dispatch) || + (ret == 0 && seq > sarea->last_dispatch) || + (ret == 0 && sarea->last_dispatch - seq >= (1 << 24))); if ( ret ) { fprintf( stderr, "%s: drmI830IrqWait: %d\n", __FUNCTION__, ret ); @@ -123,7 +122,9 @@ void intelWaitIrq( struct intel_context *intel, int seq ) void intel_batch_ioctl( struct intel_context *intel, GLuint start_offset, - GLuint used) + GLuint used, + GLboolean ignore_cliprects, + GLboolean allow_unlock ) { drmI830BatchBuffer batch; @@ -150,52 +151,68 @@ void intel_batch_ioctl( struct intel_context *intel, batch.start, batch.start + batch.used * 4); - if (!intel->no_hw) { + if (!intel->intelScreen->no_hw) { if (drmCommandWrite (intel->driFd, DRM_I830_BATCHBUFFER, &batch, sizeof(batch))) { fprintf(stderr, "DRM_I830_BATCHBUFFER: %d\n", -errno); UNLOCK_HARDWARE(intel); exit(1); } - - if (INTEL_DEBUG & DEBUG_SYNC) { - intelWaitIdleLocked(intel); - } } } -void intel_cmd_ioctl( struct intel_context *intel, - char *buf, - GLuint used) +void +intel_exec_ioctl(struct intel_context *intel, + GLuint used, + GLboolean ignore_cliprects, GLboolean allow_unlock, + void *start, GLuint count, dri_fence **fence) { - drmI830CmdBuffer cmd; + struct drm_i915_execbuffer execbuf; + dri_fence *fo; assert(intel->locked); assert(used); - cmd.buf = buf; - cmd.sz = used; - cmd.cliprects = intel->pClipRects; - cmd.num_cliprects = 0; - cmd.DR1 = 0; - cmd.DR4 = 0; - - if (INTEL_DEBUG & DEBUG_DMA) - fprintf(stderr, "%s: 0x%x..0x%x\n", - __FUNCTION__, - 0, - 0 + cmd.sz); + if (*fence) { + dri_fence_unreference(*fence); + } - if (!intel->no_hw) { - if (drmCommandWrite (intel->driFd, DRM_I830_CMDBUFFER, &cmd, - sizeof(cmd))) { - fprintf(stderr, "DRM_I830_CMDBUFFER: %d\n", -errno); - UNLOCK_HARDWARE(intel); - exit(1); - } + memset(&execbuf, 0, sizeof(execbuf)); - if (INTEL_DEBUG & DEBUG_SYNC) { - intelWaitIdleLocked(intel); - } + execbuf.num_buffers = count; + execbuf.batch.used = used; + execbuf.batch.cliprects = intel->pClipRects; + execbuf.batch.num_cliprects = ignore_cliprects ? 0 : intel->numClipRects; + execbuf.batch.DR1 = 0; + execbuf.batch.DR4 = ((((GLuint) intel->drawX) & 0xffff) | + (((GLuint) intel->drawY) << 16)); + + execbuf.ops_list = (unsigned)start; // TODO + execbuf.fence_arg.flags = DRM_FENCE_FLAG_SHAREABLE | DRM_I915_FENCE_FLAG_FLUSHED; + + if (intel->intelScreen->no_hw) + return; + + if (drmCommandWriteRead(intel->driFd, DRM_I915_EXECBUFFER, &execbuf, + sizeof(execbuf))) { + fprintf(stderr, "DRM_I830_EXECBUFFER: %d\n", -errno); + UNLOCK_HARDWARE(intel); + exit(1); + } + + + fo = intel_ttm_fence_create_from_arg(intel->intelScreen->bufmgr, "fence buffers", + &execbuf.fence_arg); + if (!fo) { + fprintf(stderr, "failed to fence handle: %08x\n", execbuf.fence_arg.handle); + UNLOCK_HARDWARE(intel); + exit(1); } + *fence = fo; + + /* FIXME: use hardware contexts to avoid 'losing' hardware after + * each buffer flush. + */ + intel->vtbl.lost_hardware(intel); + } diff --git a/src/mesa/drivers/dri/i965/intel_ioctl.h b/src/mesa/drivers/dri/i965/intel_ioctl.h index df27659362..af74ed436e 100644 --- a/src/mesa/drivers/dri/i965/intel_ioctl.h +++ b/src/mesa/drivers/dri/i965/intel_ioctl.h @@ -30,15 +30,17 @@ #include "intel_context.h" -void intelWaitIrq( struct intel_context *intel, int seq ); -int intelEmitIrqLocked( struct intel_context *intel ); +void intelWaitIrq( intelScreenPrivate *intelScreen, int seq ); +int intelEmitIrqLocked( intelScreenPrivate *intelScreen ); void intel_batch_ioctl( struct intel_context *intel, GLuint start_offset, - GLuint used); - -void intel_cmd_ioctl( struct intel_context *intel, - char *buf, - GLuint used); + GLuint used, + GLboolean ignore_cliprects, + GLboolean allow_unlock ); +void intel_exec_ioctl(struct intel_context *intel, + GLuint used, + GLboolean ignore_cliprects, GLboolean allow_unlock, + void *start, GLuint count, dri_fence **fence); #endif diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 09c8510188..edca84c64e 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -28,7 +28,7 @@ #include "intel_context.h" #include "intel_mipmap_tree.h" #include "intel_regions.h" -#include "bufmgr.h" +#include "dri_bufmgr.h" #include "enums.h" #include "imports.h" diff --git a/src/mesa/drivers/dri/i965/intel_regions.c b/src/mesa/drivers/dri/i965/intel_regions.c index 9c92ab4777..64610e551e 100644 --- a/src/mesa/drivers/dri/i965/intel_regions.c +++ b/src/mesa/drivers/dri/i965/intel_regions.c @@ -42,7 +42,8 @@ #include "intel_context.h" #include "intel_regions.h" #include "intel_blit.h" -#include "bufmgr.h" +#include "dri_bufmgr.h" +#include "intel_bufmgr_ttm.h" #include "imports.h" #define FILE_DEBUG_FLAG DEBUG_REGION @@ -53,9 +54,8 @@ GLubyte *intel_region_map(struct intel_context *intel, struct intel_region *regi { DBG("%s\n", __FUNCTION__); if (!region->map_refcount++) { - region->map = bmMapBuffer(intel, region->buffer, 0); - if (!region->map) - region->map_refcount--; + dri_bo_map(region->buffer, GL_TRUE); + region->map = region->buffer->virtual; } return region->map; @@ -66,7 +66,7 @@ void intel_region_unmap(struct intel_context *intel, { DBG("%s\n", __FUNCTION__); if (!--region->map_refcount) { - bmUnmapBuffer(intel, region->buffer); + dri_bo_unmap(region->buffer); region->map = NULL; } } @@ -86,8 +86,8 @@ struct intel_region *intel_region_alloc( struct intel_context *intel, region->height = height; /* needed? */ region->refcount = 1; - bmGenBuffers(intel, "tex", 1, ®ion->buffer, 6); - bmBufferData(intel, region->buffer, pitch * cpp * height, NULL, 0); + region->buffer = dri_bo_alloc(intel->intelScreen->bufmgr, "region", + pitch * cpp * height, 64, DRM_BO_FLAG_MEM_TT); return region; } @@ -110,25 +110,23 @@ void intel_region_release( struct intel_context *intel, if (--(*region)->refcount == 0) { assert((*region)->map_refcount == 0); - bmDeleteBuffers(intel, 1, &(*region)->buffer); + dri_bo_unreference((*region)->buffer); free(*region); } *region = NULL; } -struct intel_region *intel_region_create_static( struct intel_context *intel, - GLuint mem_type, - GLuint offset, - void *virtual, - GLuint cpp, - GLuint pitch, - GLuint height, - GLuint size, - GLboolean tiled ) +struct intel_region *intel_region_create_static(intelScreenPrivate *intelScreen, + char *name, + GLuint mem_type, + unsigned int bo_handle, + GLuint offset, + void *virtual, + GLuint cpp, GLuint pitch, + GLuint height, GLboolean tiled) { struct intel_region *region = calloc(sizeof(*region), 1); - GLint pool; DBG("%s\n", __FUNCTION__); @@ -138,27 +136,58 @@ struct intel_region *intel_region_create_static( struct intel_context *intel, region->refcount = 1; region->tiled = tiled; - /* Recipe for creating a static buffer - create a static pool with - * the right offset and size, generate a buffer and use a special - * call to bind it to all of the memory in that pool. - */ - pool = bmInitPool(intel, offset, virtual, size, - (BM_MEM_AGP | - BM_NO_UPLOAD | - BM_NO_EVICT | - BM_NO_MOVE)); - if (pool < 0) { - _mesa_printf("bmInitPool failed for static region\n"); - exit(1); + if (intelScreen->ttm) { + assert(bo_handle != -1); + region->buffer = intel_ttm_bo_create_from_handle(intelScreen->bufmgr, + name, + bo_handle); + } else { + region->buffer = dri_bo_alloc_static(intelScreen->bufmgr, + name, + offset, pitch * cpp * height, + virtual, + DRM_BO_FLAG_MEM_TT); } - region->buffer = bmGenBufferStatic(intel, pool); - return region; } +void +intel_region_update_static(intelScreenPrivate *intelScreen, + struct intel_region *region, + GLuint mem_type, + unsigned int bo_handle, + GLuint offset, + void *virtual, + GLuint cpp, GLuint pitch, GLuint height, + GLboolean tiled) +{ + DBG("%s\n", __FUNCTION__); + region->cpp = cpp; + region->pitch = pitch; + region->height = height; /* needed? */ + region->tiled = tiled; + /* + * We use a "shared" buffer type to indicate buffers created and + * shared by others. + */ + + dri_bo_unreference(region->buffer); + if (intelScreen->ttm) { + assert(bo_handle != -1); + region->buffer = intel_ttm_bo_create_from_handle(intelScreen->bufmgr, + "static region", + bo_handle); + } else { + region->buffer = dri_bo_alloc_static(intelScreen->bufmgr, + "static region", + offset, pitch * cpp * height, + virtual, + DRM_BO_FLAG_MEM_TT); + } +} void _mesa_copy_rect( GLubyte *dst, GLuint cpp, @@ -212,41 +241,17 @@ GLboolean intel_region_data(struct intel_context *intel, { DBG("%s\n", __FUNCTION__); - if (width == dst->pitch && - width == src_pitch && - dst_offset == 0 && - height == dst->height && - srcx == 0 && - srcy == 0) - { - return (bmBufferData(intel, - dst->buffer, - dst->cpp * width * dst->height, - src, 0) == 0); - } - else { - GLubyte *map = intel_region_map(intel, dst); - - if (map) { - assert (dst_offset + dstx + width + - (dsty + height - 1) * dst->pitch * dst->cpp <= - dst->pitch * dst->cpp * dst->height); - - _mesa_copy_rect(map + dst_offset, - dst->cpp, - dst->pitch, - dstx, dsty, - width, height, - src, - src_pitch, - srcx, srcy); - - intel_region_unmap(intel, dst); - return GL_TRUE; - } - else - return GL_FALSE; - } + assert (dst_offset + dstx + width + + (dsty + height - 1) * dst->pitch * dst->cpp <= + dst->pitch * dst->cpp * dst->height); + + _mesa_copy_rect(intel_region_map(intel, dst) + dst_offset, + dst->cpp, + dst->pitch, + dstx, dsty, width, height, src, src_pitch, srcx, srcy); + intel_region_unmap(intel, dst); + + return GL_TRUE; } /* Copy rectangular sub-regions. Need better logic about when to diff --git a/src/mesa/drivers/dri/i965/intel_regions.h b/src/mesa/drivers/dri/i965/intel_regions.h index d2235f1275..985102cc18 100644 --- a/src/mesa/drivers/dri/i965/intel_regions.h +++ b/src/mesa/drivers/dri/i965/intel_regions.h @@ -29,7 +29,8 @@ #define INTEL_REGIONS_H #include "mtypes.h" -#include "bufmgr.h" /* for DBG! */ +#include "dri_bufmgr.h" /* for DBG! */ +#include "intel_screen.h" struct intel_context; /* A layer on top of the bufmgr buffers that adds a few useful things: @@ -40,7 +41,7 @@ struct intel_context; * - Blitter commands for copying 2D regions between buffers. */ struct intel_region { - struct buffer *buffer; + dri_bo *buffer; GLuint refcount; GLuint cpp; GLuint pitch; @@ -71,15 +72,24 @@ void intel_region_release(struct intel_context *intel, * buffers are within those zones. Tiling regions without fence * registers is more work. */ -struct intel_region *intel_region_create_static( struct intel_context *intel, - GLuint mem_type, - GLuint offset, - void *virtual, - GLuint cpp, - GLuint pitch, - GLuint height, - GLuint size, - GLboolean tiled ); +struct intel_region * +intel_region_create_static(intelScreenPrivate *intelScreen, + char *name, + GLuint mem_type, + unsigned int bo_handle, + GLuint offset, + void *virtual, + GLuint cpp, + GLuint pitch, GLuint height, GLboolean tiled); +void +intel_region_update_static(intelScreenPrivate *intelScreen, + struct intel_region *region, + GLuint mem_type, + unsigned int bo_handle, + GLuint offset, + void *virtual, + GLuint cpp, GLuint pitch, GLuint height, + GLboolean tiled); /* Map/unmap regions. This is refcounted also: */ diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index 77fd9e386a..975c647e55 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -42,7 +42,10 @@ #include "intel_tex.h" #include "intel_span.h" #include "intel_ioctl.h" +#include "intel_regions.h" +#include "intel_bufmgr_ttm.h" +#include "i915_drm.h" #include "i830_dri.h" PUBLIC const char __driConfigOptions[] = @@ -126,6 +129,107 @@ intelMapScreenRegions(__DRIscreenPrivate *sPriv) return GL_TRUE; } +/** Driver-specific fence emit implementation for the fake memory manager. */ +static unsigned int +intel_fence_emit(void *private) +{ + intelScreenPrivate *intelScreen = (intelScreenPrivate *)private; + unsigned int fence; + + /* XXX: Need to emit a flush, if we haven't already (at least with the + * current batchbuffer implementation, we have). + */ + + fence = intelEmitIrqLocked(intelScreen); + + return fence; +} + +/** Driver-specific fence wait implementation for the fake memory manager. */ +static int +intel_fence_wait(void *private, unsigned int cookie) +{ + intelScreenPrivate *intelScreen = (intelScreenPrivate *)private; + + intelWaitIrq(intelScreen, cookie); + + return 0; +} + +static struct intel_region * +intel_recreate_static(intelScreenPrivate *intelScreen, + char *name, struct intel_region *region, + intelRegion *region_desc, + GLuint mem_type) +{ + if (region) { + intel_region_update_static(intelScreen, region, mem_type, + region_desc->bo_handle, region_desc->offset, + region_desc->map, intelScreen->cpp, + region_desc->pitch / intelScreen->cpp, + intelScreen->height, region_desc->tiled); + } else { + region = intel_region_create_static(intelScreen, name, mem_type, + region_desc->bo_handle, + region_desc->offset, + region_desc->map, intelScreen->cpp, + region_desc->pitch / intelScreen->cpp, + intelScreen->height, + region_desc->tiled); + } + + assert(region->buffer != NULL); + + return region; +} + + +/* Create intel_region structs to describe the static front,back,depth + * buffers created by the xserver. + * + * Although FBO's mean we now no longer use these as render targets in + * all circumstances, they won't go away until the back and depth + * buffers become private, and the front and rotated buffers will + * remain even then. + * + * Note that these don't allocate video memory, just describe + * allocations alread made by the X server. + */ +static void +intel_recreate_static_regions(intelScreenPrivate *intelScreen) +{ + intelScreen->front_region = + intel_recreate_static(intelScreen, "front", + intelScreen->front_region, + &intelScreen->front, + DRM_BO_FLAG_MEM_TT); + + /* The rotated region is only used for old DDXes that didn't handle rotation + * on their own. + */ + if (intelScreen->driScrnPriv->ddx_version.minor < 8) { + intelScreen->rotated_region = + intel_recreate_static(intelScreen, "rotated", + intelScreen->rotated_region, + &intelScreen->rotated, + DRM_BO_FLAG_MEM_TT); + } + + intelScreen->back_region = + intel_recreate_static(intelScreen, "back", + intelScreen->back_region, + &intelScreen->back, + DRM_BO_FLAG_MEM_TT); + + /* Still assumes front.cpp == depth.cpp. We can kill this when we move to + * private buffers. + */ + intelScreen->depth_region = + intel_recreate_static(intelScreen, "depth", + intelScreen->depth_region, + &intelScreen->depth, + DRM_BO_FLAG_MEM_TT); +} void intelUnmapScreenRegions(intelScreenPrivate *intelScreen) @@ -243,6 +347,16 @@ intelUpdateScreenFromSAREA(intelScreenPrivate *intelScreen, intelScreen->depth.size = sarea->depth_size; intelScreen->depth.tiled = sarea->depth_tiled; + if (intelScreen->driScrnPriv->ddx_version.minor >= 9) { + intelScreen->front.bo_handle = sarea->front_bo_handle; + intelScreen->back.bo_handle = sarea->back_bo_handle; + intelScreen->depth.bo_handle = sarea->depth_bo_handle; + } else { + intelScreen->front.bo_handle = -1; + intelScreen->back.bo_handle = -1; + intelScreen->depth.bo_handle = -1; + } + intelScreen->tex.offset = sarea->tex_offset; intelScreen->logTextureGranularity = sarea->log_tex_granularity; intelScreen->tex.handle = sarea->tex_handle; @@ -301,6 +415,7 @@ static GLboolean intelInitDriver(__DRIscreenPrivate *sPriv) (((GLubyte *)sPriv->pSAREA)+intelScreen->sarea_priv_offset); intelScreen->deviceID = gDRIPriv->deviceID; + intelScreen->maxBatchSize = 16 * 1024; intelScreen->mem = gDRIPriv->mem; intelScreen->cpp = gDRIPriv->cpp; @@ -357,7 +472,40 @@ static GLboolean intelInitDriver(__DRIscreenPrivate *sPriv) } sPriv->extensions = intelExtensions; - + + if (getenv("INTEL_NO_TTM") == NULL && + intelScreen->driScrnPriv->ddx_version.minor >= 9 && + intelScreen->drmMinor >= 11 && + intelScreen->front.bo_handle != -1) { + intelScreen->bufmgr = intel_bufmgr_ttm_init(sPriv->fd, + DRM_FENCE_TYPE_EXE, + DRM_FENCE_TYPE_EXE | + DRM_I915_FENCE_TYPE_RW, + intelScreen->maxBatchSize); + if (intelScreen->bufmgr != NULL) + intelScreen->ttm = GL_TRUE; + } + /* Otherwise, use the classic buffer manager. */ + if (intelScreen->bufmgr == NULL) { + if (intelScreen->tex.size == 0) { + fprintf(stderr, "[%s:%u] Error initializing buffer manager.\n", + __func__, __LINE__); + return GL_FALSE; + } + fprintf(stderr, "[%s:%u] Failed to init TTM buffer manager, falling back" + " to classic.\n", __func__, __LINE__); + intelScreen->bufmgr = dri_bufmgr_fake_init(intelScreen->tex.offset, + intelScreen->tex.map, + intelScreen->tex.size, + intel_fence_emit, + intel_fence_wait, + intelScreen); + } + + intel_recreate_static_regions(intelScreen); + + intelScreen->no_hw = getenv("INTEL_NO_HW") != NULL; + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h index bf9a716082..9d6c9dedbf 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.h +++ b/src/mesa/drivers/dri/i965/intel_screen.h @@ -30,6 +30,7 @@ #include <sys/time.h> #include "dri_util.h" +#include "dri_bufmgr.h" #include "xmlconfig.h" #include "i830_common.h" @@ -42,6 +43,7 @@ typedef struct { char *map; /* memory map */ int offset; /* from start of video mem, in bytes */ int pitch; /* row stride, in pixels */ + unsigned int bo_handle; unsigned int tiled; } intelRegion; @@ -52,7 +54,12 @@ typedef struct intelRegion rotated; intelRegion depth; intelRegion tex; - + + struct intel_region *front_region; + struct intel_region *back_region; + struct intel_region *depth_region; + struct intel_region *rotated_region; + int deviceID; int width; int height; @@ -76,10 +83,23 @@ typedef struct int current_rotation; /* 0, 90, 180 or 270 */ int rotatedWidth, rotatedHeight; + GLboolean no_hw; + /** * Configuration cache with default values for all contexts */ driOptionCache optionCache; + + dri_bufmgr *bufmgr; + unsigned int maxBatchSize; + + /** + * This value indicates that the kernel memory manager is being used + * instead of the fake client-side memory manager. + */ + GLboolean ttm; + + unsigned batch_id; } intelScreenPrivate; diff --git a/src/mesa/drivers/dri/i965/intel_tex_validate.c b/src/mesa/drivers/dri/i965/intel_tex_validate.c index bd59c84dbd..4c8afd99da 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_validate.c +++ b/src/mesa/drivers/dri/i965/intel_tex_validate.c @@ -31,7 +31,7 @@ #include "intel_context.h" #include "intel_mipmap_tree.h" #include "intel_tex.h" -#include "bufmgr.h" +#include "dri_bufmgr.h" /** * Compute which mipmap levels that really need to be sent to the hardware. @@ -116,11 +116,13 @@ static void intel_texture_invalidate( struct intel_texture_object *intelObj ) intelObj->dirty_images[face] = ~0; } +#if 0 static void intel_texture_invalidate_cb( struct intel_context *intel, void *ptr ) { intel_texture_invalidate( (struct intel_texture_object *) ptr ); } +#endif /* */ @@ -207,7 +209,7 @@ GLuint intel_finalize_mipmap_tree( struct intel_context *intel, firstImage->Depth, cpp, firstImage->IsCompressed); - +#if 0 /* Tell the buffer manager that we will manage the backing * store, but we still want it to do fencing for us. */ @@ -216,6 +218,7 @@ GLuint intel_finalize_mipmap_tree( struct intel_context *intel, intel_texture_invalidate_cb, intelObj, GL_FALSE); +#endif } /* Pull in any images not in the object's tree: diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index b51086a638..21db0e7dcd 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -175,6 +175,9 @@ do_flush_locked(struct intel_batchbuffer *batch, intel_decode(batch->buf->virtual, used / 4, batch->buf->offset, intel->intelScreen->deviceID); dri_bo_unmap(batch->buf); + + if (intel->vtbl.debug_batch != NULL) + intel->vtbl.debug_batch(intel); } } diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h index 1bbbbde293..1aa86ae3f0 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -95,7 +95,6 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch, #define BATCH_LOCALS #define BEGIN_BATCH(n, flags) do { \ - assert(!intel->prim.flush); \ intel_batchbuffer_require_space(intel->batch, (n)*4, flags); \ } while (0) |