From 7c81124d7c4a4d1da9f48cbf7e82ab1a3a970a7a Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 24 Aug 2008 17:52:40 +1000 Subject: Revert "Merge branch 'drm-gem'" This reverts commit 53675e5c05c0598b7ea206d5c27dbcae786a2c03. Conflicts: src/mesa/drivers/dri/i965/brw_wm_surface_state.c --- src/mesa/drivers/dri/intel/intel_batchbuffer.c | 134 ++- src/mesa/drivers/dri/intel/intel_batchbuffer.h | 24 +- src/mesa/drivers/dri/intel/intel_blit.c | 113 +-- src/mesa/drivers/dri/intel/intel_blit.h | 8 +- src/mesa/drivers/dri/intel/intel_buffer_objects.c | 4 +- src/mesa/drivers/dri/intel/intel_buffers.c | 2 - src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c | 1122 +++++++++++++++++++++ src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h | 28 + src/mesa/drivers/dri/intel/intel_context.c | 111 +- src/mesa/drivers/dri/intel/intel_context.h | 14 +- src/mesa/drivers/dri/intel/intel_decode.c | 22 +- src/mesa/drivers/dri/intel/intel_depthstencil.c | 80 +- src/mesa/drivers/dri/intel/intel_fbo.c | 20 +- src/mesa/drivers/dri/intel/intel_fbo.h | 6 +- src/mesa/drivers/dri/intel/intel_ioctl.c | 82 +- src/mesa/drivers/dri/intel/intel_ioctl.h | 16 +- src/mesa/drivers/dri/intel/intel_pixel_bitmap.c | 7 +- src/mesa/drivers/dri/intel/intel_pixel_copy.c | 10 +- src/mesa/drivers/dri/intel/intel_pixel_draw.c | 16 +- src/mesa/drivers/dri/intel/intel_reg.h | 129 --- src/mesa/drivers/dri/intel/intel_regions.c | 162 +-- src/mesa/drivers/dri/intel/intel_regions.h | 13 +- src/mesa/drivers/dri/intel/intel_screen.c | 115 ++- src/mesa/drivers/dri/intel/intel_screen.h | 2 - src/mesa/drivers/dri/intel/intel_span.c | 676 +++---------- src/mesa/drivers/dri/intel/intel_span.h | 6 +- src/mesa/drivers/dri/intel/intel_tex_copy.c | 6 +- src/mesa/drivers/dri/intel/intel_tex_image.c | 23 +- src/mesa/drivers/dri/intel/intel_tex_validate.c | 14 + 29 files changed, 1803 insertions(+), 1162 deletions(-) create mode 100644 src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c create mode 100644 src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h (limited to 'src/mesa/drivers/dri/intel') diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 5afaad070c..a594fb6cc4 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -29,7 +29,6 @@ #include "intel_ioctl.h" #include "intel_decode.h" #include "intel_reg.h" -#include "intel_bufmgr.h" /* Relocations in kernel space: * - pass dma buffer seperately @@ -79,21 +78,19 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch) batch->buf = NULL; } - if (!batch->buffer && intel->ttm == GL_TRUE) - batch->buffer = malloc (intel->maxBatchSize); - batch->buf = dri_bo_alloc(intel->bufmgr, "batchbuffer", - intel->maxBatchSize, 4096); - if (batch->buffer) - batch->map = batch->buffer; - else { - dri_bo_map(batch->buf, GL_TRUE); - batch->map = batch->buf->virtual; - } + intel->maxBatchSize, 4096, + DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED); + dri_bo_map(batch->buf, GL_TRUE); + batch->map = batch->buf->virtual; batch->size = intel->maxBatchSize; batch->ptr = batch->map; batch->dirty_state = ~0; batch->cliprect_mode = IGNORE_CLIPRECTS; + + /* account batchbuffer in aperture */ + dri_bufmgr_check_aperture_space(batch->buf); + } struct intel_batchbuffer * @@ -102,6 +99,7 @@ intel_batchbuffer_alloc(struct intel_context *intel) struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1); batch->intel = intel; + batch->last_fence = NULL; intel_batchbuffer_reset(batch); return batch; @@ -110,13 +108,14 @@ intel_batchbuffer_alloc(struct intel_context *intel) void intel_batchbuffer_free(struct intel_batchbuffer *batch) { - if (batch->buffer) - free (batch->buffer); - else { - if (batch->map) { - dri_bo_unmap(batch->buf); - batch->map = NULL; - } + if (batch->last_fence) { + dri_fence_wait(batch->last_fence); + dri_fence_unreference(batch->last_fence); + batch->last_fence = NULL; + } + if (batch->map) { + dri_bo_unmap(batch->buf); + batch->map = NULL; } dri_bo_unreference(batch->buf); batch->buf = NULL; @@ -132,12 +131,11 @@ do_flush_locked(struct intel_batchbuffer *batch, GLuint used, GLboolean allow_unlock) { struct intel_context *intel = batch->intel; - int ret = 0; + void *start; + GLuint count; - if (batch->buffer) - dri_bo_subdata (batch->buf, 0, used, batch->buffer); - else - dri_bo_unmap(batch->buf); + dri_bo_unmap(batch->buf); + start = dri_process_relocs(batch->buf, &count); batch->map = NULL; batch->ptr = NULL; @@ -150,25 +148,21 @@ do_flush_locked(struct intel_batchbuffer *batch, if (!(intel->numClipRects == 0 && batch->cliprect_mode == LOOP_CLIPRECTS)) { if (intel->ttm == GL_TRUE) { - struct drm_i915_gem_execbuffer *execbuf; - - execbuf = dri_process_relocs(batch->buf); - ret = intel_exec_ioctl(batch->intel, - used, - batch->cliprect_mode != LOOP_CLIPRECTS, - allow_unlock, - execbuf); + intel_exec_ioctl(batch->intel, + used, + batch->cliprect_mode != LOOP_CLIPRECTS, + allow_unlock, + start, count, &batch->last_fence); } else { - dri_process_relocs(batch->buf); - ret = intel_batch_ioctl(batch->intel, - batch->buf->offset, - used, - batch->cliprect_mode != LOOP_CLIPRECTS, - allow_unlock); + intel_batch_ioctl(batch->intel, + batch->buf->offset, + used, + batch->cliprect_mode != LOOP_CLIPRECTS, + allow_unlock); } } - - dri_post_submit(batch->buf); + + dri_post_submit(batch->buf, &batch->last_fence); if (intel->numClipRects == 0 && batch->cliprect_mode == LOOP_CLIPRECTS) { @@ -193,10 +187,6 @@ do_flush_locked(struct intel_batchbuffer *batch, intel->vtbl.debug_batch(intel); } - if (ret != 0) { - UNLOCK_HARDWARE(intel); - exit(1); - } intel->vtbl.new_batch(intel); } @@ -214,27 +204,21 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, if (INTEL_DEBUG & DEBUG_BATCH) fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line, used); - - /* Emit a flush if the bufmgr doesn't do it for us. */ - if (!intel->ttm) { - *(GLuint *) (batch->ptr) = intel->vtbl.flush_cmd(); - batch->ptr += 4; - used = batch->ptr - batch->map; + /* Add the MI_BATCH_BUFFER_END. Always add an MI_FLUSH - this is a + * performance drain that we would like to avoid. + */ + if (used & 4) { + ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd(); + ((int *) batch->ptr)[1] = 0; + ((int *) batch->ptr)[2] = MI_BATCH_BUFFER_END; + used += 12; } - - /* Round batchbuffer usage to 2 DWORDs. */ - - if ((used & 4) == 0) { - *(GLuint *) (batch->ptr) = 0; /* noop */ - batch->ptr += 4; - used = batch->ptr - batch->map; + else { + ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd(); + ((int *) batch->ptr)[1] = MI_BATCH_BUFFER_END; + used += 8; } - /* Mark the end of the buffer. */ - *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END; /* noop */ - batch->ptr += 4; - used = batch->ptr - batch->map; - /* Workaround for recursive batchbuffer flushing: If the window is * moved, we can get into a case where we try to flush during a * flush. What happens is that when we try to grab the lock for @@ -246,9 +230,6 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, * avoid that in the first place. */ batch->ptr = batch->map; - if (intel->vtbl.finish_batch) - intel->vtbl.finish_batch(intel); - /* TODO: Just pass the relocation list and dma buffer up to the * kernel. */ @@ -261,13 +242,9 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, UNLOCK_HARDWARE(intel); if (INTEL_DEBUG & DEBUG_SYNC) { - int irq; - fprintf(stderr, "waiting for idle\n"); - LOCK_HARDWARE(intel); - irq = intelEmitIrqLocked(intel); - UNLOCK_HARDWARE(intel); - intelWaitIrq(intel, irq); + if (batch->last_fence != NULL) + dri_fence_wait(batch->last_fence); } /* Reset the buffer: @@ -275,22 +252,25 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, intel_batchbuffer_reset(batch); } +void +intel_batchbuffer_finish(struct intel_batchbuffer *batch) +{ + intel_batchbuffer_flush(batch); + if (batch->last_fence != NULL) + dri_fence_wait(batch->last_fence); +} + /* This is the only way buffers get added to the validate list. */ GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, dri_bo *buffer, - uint32_t read_domains, uint32_t write_domain, - uint32_t delta) + GLuint flags, GLuint delta) { int ret; - if (batch->ptr - batch->map > batch->buf->size) - _mesa_printf ("bad relocation ptr %p map %p offset %d size %d\n", - batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size); - ret = intel_bo_emit_reloc(batch->buf, read_domains, write_domain, - delta, batch->ptr - batch->map, buffer); + ret = dri_emit_reloc(batch->buf, flags, delta, batch->ptr - batch->map, buffer); /* * Using the old buffer offset, write in what the right data would be, in case diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h index 52d6ecc223..0da602010e 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -4,7 +4,6 @@ #include "mtypes.h" #include "dri_bufmgr.h" -#include "intel_reg.h" struct intel_context; @@ -41,8 +40,7 @@ struct intel_batchbuffer struct intel_context *intel; dri_bo *buf; - - GLubyte *buffer; + dri_fence *last_fence; GLubyte *map; GLubyte *ptr; @@ -60,6 +58,8 @@ struct intel_batchbuffer *intel_batchbuffer_alloc(struct intel_context void intel_batchbuffer_free(struct intel_batchbuffer *batch); +void intel_batchbuffer_finish(struct intel_batchbuffer *batch); + void _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, int line); @@ -82,16 +82,14 @@ void intel_batchbuffer_release_space(struct intel_batchbuffer *batch, GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, dri_bo *buffer, - uint32_t read_domains, - uint32_t write_domain, - uint32_t offset); + GLuint flags, GLuint offset); /* Inline functions - might actually be better off with these * non-inlined. Certainly better off switching all command packets to * be passed as structs rather than dwords, but that's a little bit of * work... */ -static INLINE GLint +static INLINE GLuint intel_batchbuffer_space(struct intel_batchbuffer *batch) { return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map); @@ -138,20 +136,12 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch, #define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d) -#define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ +#define OUT_RELOC(buf, cliprect_mode, delta) do { \ assert((delta) >= 0); \ - intel_batchbuffer_emit_reloc(intel->batch, buf, \ - read_domains, write_domain, delta); \ + intel_batchbuffer_emit_reloc(intel->batch, buf, cliprect_mode, delta); \ } while (0) #define ADVANCE_BATCH() do { } while(0) -static INLINE void -intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch) -{ - intel_batchbuffer_require_space(batch, 4, IGNORE_CLIPRECTS); - intel_batchbuffer_emit_dword(batch, MI_FLUSH); -} - #endif diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index 7129a4ba91..25ac609f13 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -54,6 +54,7 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, struct intel_context *intel; const intelScreenPrivate *intelScreen; + int ret; DBG("%s\n", __FUNCTION__); @@ -65,6 +66,14 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, intelScreen = intel->intelScreen; + if (intel->last_swap_fence) { + dri_fence_wait(intel->last_swap_fence); + dri_fence_unreference(intel->last_swap_fence); + intel->last_swap_fence = NULL; + } + intel->last_swap_fence = intel->first_swap_fence; + intel->first_swap_fence = NULL; + /* The LOCK_HARDWARE is required for the cliprects. Buffer offsets * should work regardless. */ @@ -80,7 +89,6 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, unsigned short src_x, src_y; int BR13, CMD; int i; - dri_bo *aper_array[3]; src = intel_get_rb_region(&intel_fb->Base, BUFFER_BACK_LEFT); dst = intel_get_rb_region(&intel_fb->Base, BUFFER_FRONT_LEFT); @@ -106,28 +114,26 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, } #ifndef I915 - if (src->tiling != I915_TILING_NONE) { + if (src->tiled) { CMD |= XY_SRC_TILED; src_pitch /= 4; } - if (dst->tiling != I915_TILING_NONE) { + if (dst->tiled) { CMD |= XY_DST_TILED; dst_pitch /= 4; } #endif /* do space/cliprects check before going any further */ - intel_batchbuffer_require_space(intel->batch, 8 * 4, - REFERENCES_CLIPRECTS); + intel_batchbuffer_require_space(intel->batch, 8 * 4, REFERENCES_CLIPRECTS); again: - aper_array[0] = intel->batch->buf; - aper_array[1] = dst->buffer; - aper_array[2] = src->buffer; - - if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) { + ret = dri_bufmgr_check_aperture_space(dst->buffer); + ret |= dri_bufmgr_check_aperture_space(src->buffer); + + if (ret) { intel_batchbuffer_flush(intel->batch); goto again; } - + for (i = 0; i < nbox; i++, pbox++) { drm_clip_rect_t box = *pbox; @@ -151,22 +157,19 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, OUT_BATCH((box.y1 << 16) | box.x1); OUT_BATCH((box.y2 << 16) | box.x2); - OUT_RELOC(dst->buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - 0); + OUT_RELOC(dst->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, 0); OUT_BATCH((src_y << 16) | src_x); OUT_BATCH(src_pitch); - OUT_RELOC(src->buffer, - I915_GEM_DOMAIN_RENDER, 0, - 0); + OUT_RELOC(src->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0); ADVANCE_BATCH(); } - /* Flush the rendering and the batch so that the results all land on the - * screen in a timely fashion. - */ - intel_batchbuffer_emit_mi_flush(intel->batch); + if (intel->first_swap_fence) + dri_fence_unreference(intel->first_swap_fence); intel_batchbuffer_flush(intel->batch); + intel->first_swap_fence = intel->batch->last_fence; + if (intel->first_swap_fence) + dri_fence_reference(intel->first_swap_fence); } UNLOCK_HARDWARE(intel); @@ -181,7 +184,7 @@ intelEmitFillBlit(struct intel_context *intel, GLshort dst_pitch, dri_bo *dst_buffer, GLuint dst_offset, - uint32_t dst_tiling, + GLboolean dst_tiled, GLshort x, GLshort y, GLshort w, GLshort h, GLuint color) @@ -206,7 +209,7 @@ intelEmitFillBlit(struct intel_context *intel, return; } #ifndef I915 - if (dst_tiling != I915_TILING_NONE) { + if (dst_tiled) { CMD |= XY_DST_TILED; dst_pitch /= 4; } @@ -223,9 +226,7 @@ intelEmitFillBlit(struct intel_context *intel, OUT_BATCH(BR13 | dst_pitch); OUT_BATCH((y << 16) | x); OUT_BATCH(((y + h) << 16) | (x + w)); - OUT_RELOC(dst_buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - dst_offset); + OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset); OUT_BATCH(color); ADVANCE_BATCH(); } @@ -262,11 +263,11 @@ intelEmitCopyBlit(struct intel_context *intel, GLshort src_pitch, dri_bo *src_buffer, GLuint src_offset, - uint32_t src_tiling, + GLboolean src_tiled, GLshort dst_pitch, dri_bo *dst_buffer, GLuint dst_offset, - uint32_t dst_tiling, + GLboolean dst_tiled, GLshort src_x, GLshort src_y, GLshort dst_x, GLshort dst_y, GLshort w, GLshort h, @@ -275,19 +276,17 @@ intelEmitCopyBlit(struct intel_context *intel, GLuint CMD, BR13; int dst_y2 = dst_y + h; int dst_x2 = dst_x + w; - dri_bo *aper_array[3]; + int ret; BATCH_LOCALS; /* do space/cliprects check before going any further */ intel_batchbuffer_require_space(intel->batch, 8 * 4, NO_LOOP_CLIPRECTS); again: - aper_array[0] = intel->batch->buf; - aper_array[1] = dst_buffer; - aper_array[2] = src_buffer; - - if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) { - intel_batchbuffer_flush(intel->batch); - goto again; + ret = dri_bufmgr_check_aperture_space(dst_buffer); + ret |= dri_bufmgr_check_aperture_space(src_buffer); + if (ret) { + intel_batchbuffer_flush(intel->batch); + goto again; } DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", @@ -316,11 +315,11 @@ intelEmitCopyBlit(struct intel_context *intel, } #ifndef I915 - if (dst_tiling != I915_TILING_NONE) { + if (dst_tiled) { CMD |= XY_DST_TILED; dst_pitch /= 4; } - if (src_tiling != I915_TILING_NONE) { + if (src_tiled) { CMD |= XY_SRC_TILED; src_pitch /= 4; } @@ -346,13 +345,11 @@ intelEmitCopyBlit(struct intel_context *intel, OUT_BATCH(BR13 | dst_pitch); OUT_BATCH((dst_y << 16) | dst_x); OUT_BATCH((dst_y2 << 16) | dst_x2); - OUT_RELOC(dst_buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset); OUT_BATCH((src_y << 16) | src_x); OUT_BATCH(src_pitch); - OUT_RELOC(src_buffer, - I915_GEM_DOMAIN_RENDER, 0, + OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, src_offset); ADVANCE_BATCH(); } @@ -365,17 +362,14 @@ intelEmitCopyBlit(struct intel_context *intel, OUT_BATCH(BR13 | ((uint16_t)dst_pitch)); OUT_BATCH((0 << 16) | dst_x); OUT_BATCH((h << 16) | dst_x2); - OUT_RELOC(dst_buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset + dst_y * dst_pitch); OUT_BATCH((0 << 16) | src_x); OUT_BATCH(src_pitch); - OUT_RELOC(src_buffer, - I915_GEM_DOMAIN_RENDER, 0, + OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, src_offset + src_y * src_pitch); ADVANCE_BATCH(); } - intel_batchbuffer_emit_mi_flush(intel->batch); } @@ -519,7 +513,7 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) } #ifndef I915 - if (irb_region->tiling != I915_TILING_NONE) { + if (irb_region->tiled) { CMD |= XY_DST_TILED; pitch /= 4; } @@ -547,8 +541,7 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) OUT_BATCH(BR13); OUT_BATCH((b.y1 << 16) | b.x1); OUT_BATCH((b.y2 << 16) | b.x2); - OUT_RELOC(write_buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + OUT_RELOC(write_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, irb_region->draw_offset); OUT_BATCH(clearVal); ADVANCE_BATCH(); @@ -556,7 +549,7 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) } } } - intel_batchbuffer_emit_mi_flush(intel->batch); + intel_batchbuffer_flush(intel->batch); } UNLOCK_HARDWARE(intel); @@ -570,7 +563,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, GLshort dst_pitch, dri_bo *dst_buffer, GLuint dst_offset, - uint32_t dst_tiling, + GLboolean dst_tiled, GLshort x, GLshort y, GLshort w, GLshort h, GLenum logic_op) @@ -594,13 +587,13 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, (8 * 4) + (3 * 4) + dwords, - REFERENCES_CLIPRECTS ); + NO_LOOP_CLIPRECTS ); opcode = XY_SETUP_BLT_CMD; if (cpp == 4) opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; #ifndef I915 - if (dst_tiling != I915_TILING_NONE) { + if (dst_tiled) { opcode |= XY_DST_TILED; dst_pitch /= 4; } @@ -613,17 +606,15 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, br13 |= BR13_8888; blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */ - if (dst_tiling != I915_TILING_NONE) + if (dst_tiled) blit_cmd |= XY_DST_TILED; - BEGIN_BATCH(8 + 3, REFERENCES_CLIPRECTS); + BEGIN_BATCH(8 + 3, NO_LOOP_CLIPRECTS); OUT_BATCH(opcode); OUT_BATCH(br13); OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */ OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */ - OUT_RELOC(dst_buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - dst_offset); + OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset); OUT_BATCH(0); /* bg */ OUT_BATCH(fg_color); /* fg */ OUT_BATCH(0); /* pattern base addr */ @@ -636,7 +627,5 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, intel_batchbuffer_data( intel->batch, src_bits, dwords * 4, - REFERENCES_CLIPRECTS ); - - intel_batchbuffer_emit_mi_flush(intel->batch); + NO_LOOP_CLIPRECTS ); } diff --git a/src/mesa/drivers/dri/intel/intel_blit.h b/src/mesa/drivers/dri/intel/intel_blit.h index 0881cc4fdc..fc0620caba 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.h +++ b/src/mesa/drivers/dri/intel/intel_blit.h @@ -42,11 +42,11 @@ extern void intelEmitCopyBlit(struct intel_context *intel, GLshort src_pitch, dri_bo *src_buffer, GLuint src_offset, - uint32_t src_tiling, + GLboolean src_tiled, GLshort dst_pitch, dri_bo *dst_buffer, GLuint dst_offset, - uint32_t dst_tiling, + GLboolean dst_tiled, GLshort srcx, GLshort srcy, GLshort dstx, GLshort dsty, GLshort w, GLshort h, @@ -57,7 +57,7 @@ extern void intelEmitFillBlit(struct intel_context *intel, GLshort dst_pitch, dri_bo *dst_buffer, GLuint dst_offset, - uint32_t dst_tiling, + GLboolean dst_tiled, GLshort x, GLshort y, GLshort w, GLshort h, GLuint color); @@ -69,7 +69,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, GLshort dst_pitch, dri_bo *dst_buffer, GLuint dst_offset, - uint32_t dst_tiling, + GLboolean dst_tiled, GLshort x, GLshort y, GLshort w, GLshort h, GLenum logic_op); diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index 1923a21516..951b8cbfb7 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -32,7 +32,6 @@ #include "intel_context.h" #include "intel_buffer_objects.h" -#include "intel_batchbuffer.h" #include "intel_regions.h" #include "dri_bufmgr.h" @@ -46,7 +45,8 @@ intel_bufferobj_alloc_buffer(struct intel_context *intel, struct intel_buffer_object *intel_obj) { intel_obj->buffer = dri_bo_alloc(intel->bufmgr, "bufferobj", - intel_obj->Base.Size, 64); + intel_obj->Base.Size, 64, + DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED); } /** diff --git a/src/mesa/drivers/dri/intel/intel_buffers.c b/src/mesa/drivers/dri/intel/intel_buffers.c index 77352e6a53..75542a9775 100644 --- a/src/mesa/drivers/dri/intel/intel_buffers.c +++ b/src/mesa/drivers/dri/intel/intel_buffers.c @@ -819,8 +819,6 @@ intelSwapBuffers(__DRIdrawablePrivate * dPriv) intel_fb->swap_ust = ust; } - drmCommandNone(intel->driFd, DRM_I915_GEM_THROTTLE); - } else { /* XXX this shouldn't be an error but we can't handle it for now */ diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c new file mode 100644 index 0000000000..194814e8fb --- /dev/null +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c @@ -0,0 +1,1122 @@ +/************************************************************************** + * + * Copyright © 2007 Red Hat Inc. + * Copyright © 2007 Intel Corporation + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ +/* + * Authors: Thomas Hellström + * Keith Whitwell + * Eric Anholt + * Dave Airlie + */ + +#include +#include +#include +#include +#include +#include + +#include "errno.h" +#include "mtypes.h" +#include "dri_bufmgr.h" +#include "string.h" +#include "imports.h" + +#include "i915_drm.h" + +#include "intel_bufmgr_ttm.h" +#ifdef TTM_API + +#define DBG(...) do { \ + if (bufmgr_ttm->bufmgr.debug) \ + fprintf(stderr, __VA_ARGS__); \ +} while (0) + +/* + * These bits are always specified in each validation + * request. Other bits are not supported at this point + * as it would require a bit of investigation to figure + * out what mask value should be used. + */ +#define INTEL_BO_MASK (DRM_BO_MASK_MEM | \ + DRM_BO_FLAG_READ | \ + DRM_BO_FLAG_WRITE | \ + DRM_BO_FLAG_EXE) + +struct intel_validate_entry { + dri_bo *bo; + struct drm_i915_op_arg bo_arg; +}; + +struct dri_ttm_bo_bucket_entry { + drmBO drm_bo; + struct dri_ttm_bo_bucket_entry *next; +}; + +struct dri_ttm_bo_bucket { + struct dri_ttm_bo_bucket_entry *head; + struct dri_ttm_bo_bucket_entry **tail; + /** + * Limit on the number of entries in this bucket. + * + * 0 means that this caching at this bucket size is disabled. + * -1 means that there is no limit to caching at this size. + */ + int max_entries; + int num_entries; +}; + +/* Arbitrarily chosen, 16 means that the maximum size we'll cache for reuse + * is 1 << 16 pages, or 256MB. + */ +#define INTEL_TTM_BO_BUCKETS 16 +typedef struct _dri_bufmgr_ttm { + dri_bufmgr bufmgr; + + int fd; + unsigned int fence_type; + unsigned int fence_type_flush; + + uint32_t max_relocs; + + struct intel_validate_entry *validate_array; + int validate_array_size; + int validate_count; + + /** Array of lists of cached drmBOs of power-of-two sizes */ + struct dri_ttm_bo_bucket cache_bucket[INTEL_TTM_BO_BUCKETS]; +} dri_bufmgr_ttm; + +/** + * Private information associated with a relocation that isn't already stored + * in the relocation buffer to be passed to the kernel. + */ +struct dri_ttm_reloc { + dri_bo *target_buf; + uint64_t validate_flags; + /** Offset of target_buf after last execution of this relocation entry. */ + unsigned int last_target_offset; +}; + +typedef struct _dri_bo_ttm { + dri_bo bo; + + int refcount; + unsigned int map_count; + drmBO drm_bo; + const char *name; + + uint64_t last_flags; + + /** + * Index of the buffer within the validation list while preparing a + * batchbuffer execution. + */ + int validate_index; + + /** DRM buffer object containing relocation list */ + uint32_t *reloc_buf_data; + struct dri_ttm_reloc *relocs; + + /** + * Indicates that the buffer may be shared with other processes, so we + * can't hold maps beyond when the user does. + */ + GLboolean shared; + + GLboolean delayed_unmap; + /* Virtual address from the dri_bo_map whose unmap was delayed. */ + void *saved_virtual; +} dri_bo_ttm; + +typedef struct _dri_fence_ttm +{ + dri_fence fence; + + int refcount; + const char *name; + drmFence drm_fence; +} dri_fence_ttm; + +static int +logbase2(int n) +{ + GLint i = 1; + GLint log2 = 0; + + while (n > i) { + i *= 2; + log2++; + } + + return log2; +} + +static struct dri_ttm_bo_bucket * +dri_ttm_bo_bucket_for_size(dri_bufmgr_ttm *bufmgr_ttm, unsigned long size) +{ + int i; + + /* We only do buckets in power of two increments */ + if ((size & (size - 1)) != 0) + return NULL; + + /* We should only see sizes rounded to pages. */ + assert((size % 4096) == 0); + + /* We always allocate in units of pages */ + i = ffs(size / 4096) - 1; + if (i >= INTEL_TTM_BO_BUCKETS) + return NULL; + + return &bufmgr_ttm->cache_bucket[i]; +} + + +static void dri_ttm_dump_validation_list(dri_bufmgr_ttm *bufmgr_ttm) +{ + int i, j; + + for (i = 0; i < bufmgr_ttm->validate_count; i++) { + dri_bo *bo = bufmgr_ttm->validate_array[i].bo; + dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; + + if (bo_ttm->reloc_buf_data != NULL) { + for (j = 0; j < (bo_ttm->reloc_buf_data[0] & 0xffff); j++) { + uint32_t *reloc_entry = bo_ttm->reloc_buf_data + + I915_RELOC_HEADER + + j * I915_RELOC0_STRIDE; + dri_bo *target_bo = bo_ttm->relocs[j].target_buf; + dri_bo_ttm *target_ttm = (dri_bo_ttm *)target_bo; + + DBG("%2d: %s@0x%08x -> %s@0x%08lx + 0x%08x\n", + i, + bo_ttm->name, reloc_entry[0], + target_ttm->name, target_bo->offset, + reloc_entry[1]); + } + } else { + DBG("%2d: %s\n", i, bo_ttm->name); + } + } +} + +/** + * Adds the given buffer to the list of buffers to be validated (moved into the + * appropriate memory type) with the next batch submission. + * + * If a buffer is validated multiple times in a batch submission, it ends up + * with the intersection of the memory type flags and the union of the + * access flags. + */ +static void +intel_add_validate_buffer(dri_bo *buf, + uint64_t flags) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr; + dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; + + /* If we delayed doing an unmap to mitigate map/unmap syscall thrashing, + * do that now. + */ + if (ttm_buf->delayed_unmap) { + drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo); + ttm_buf->delayed_unmap = GL_FALSE; + } + + if (ttm_buf->validate_index == -1) { + struct intel_validate_entry *entry; + struct drm_i915_op_arg *arg; + struct drm_bo_op_req *req; + int index; + + /* Extend the array of validation entries as necessary. */ + if (bufmgr_ttm->validate_count == bufmgr_ttm->validate_array_size) { + int i, new_size = bufmgr_ttm->validate_array_size * 2; + + if (new_size == 0) + new_size = 5; + + bufmgr_ttm->validate_array = + realloc(bufmgr_ttm->validate_array, + sizeof(struct intel_validate_entry) * new_size); + bufmgr_ttm->validate_array_size = new_size; + + /* Update pointers for realloced mem. */ + for (i = 0; i < bufmgr_ttm->validate_count - 1; i++) { + bufmgr_ttm->validate_array[i].bo_arg.next = (unsigned long) + &bufmgr_ttm->validate_array[i + 1].bo_arg; + } + } + + /* Pick out the new array entry for ourselves */ + index = bufmgr_ttm->validate_count; + ttm_buf->validate_index = index; + entry = &bufmgr_ttm->validate_array[index]; + bufmgr_ttm->validate_count++; + + /* Fill in array entry */ + entry->bo = buf; + dri_bo_reference(buf); + + /* Fill in kernel arg */ + arg = &entry->bo_arg; + req = &arg->d.req; + + memset(arg, 0, sizeof(*arg)); + req->bo_req.handle = ttm_buf->drm_bo.handle; + req->op = drm_bo_validate; + req->bo_req.flags = flags; + req->bo_req.hint = 0; +#ifdef DRM_BO_HINT_PRESUMED_OFFSET + /* PRESUMED_OFFSET indicates that all relocations pointing at this + * buffer have the correct offset. If any of our relocations don't, + * this flag will be cleared off the buffer later in the relocation + * processing. + */ + req->bo_req.hint |= DRM_BO_HINT_PRESUMED_OFFSET; + req->bo_req.presumed_offset = buf->offset; +#endif + req->bo_req.mask = INTEL_BO_MASK; + req->bo_req.fence_class = 0; /* Backwards compat. */ + + if (ttm_buf->reloc_buf_data != NULL) + arg->reloc_ptr = (unsigned long)(void *)ttm_buf->reloc_buf_data; + else + arg->reloc_ptr = 0; + + /* Hook up the linked list of args for the kernel */ + arg->next = 0; + if (index != 0) { + bufmgr_ttm->validate_array[index - 1].bo_arg.next = + (unsigned long)arg; + } + } else { + struct intel_validate_entry *entry = + &bufmgr_ttm->validate_array[ttm_buf->validate_index]; + struct drm_i915_op_arg *arg = &entry->bo_arg; + struct drm_bo_op_req *req = &arg->d.req; + uint64_t memFlags = req->bo_req.flags & flags & DRM_BO_MASK_MEM; + uint64_t modeFlags = (req->bo_req.flags | flags) & ~DRM_BO_MASK_MEM; + + /* Buffer was already in the validate list. Extend its flags as + * necessary. + */ + + if (memFlags == 0) { + fprintf(stderr, + "%s: No shared memory types between " + "0x%16llx and 0x%16llx\n", + __FUNCTION__, req->bo_req.flags, flags); + abort(); + } + if (flags & ~INTEL_BO_MASK) { + fprintf(stderr, + "%s: Flags bits 0x%16llx are not supposed to be used in a relocation\n", + __FUNCTION__, flags & ~INTEL_BO_MASK); + abort(); + } + req->bo_req.flags = memFlags | modeFlags; + } +} + + +#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ + sizeof(uint32_t)) + +static int +intel_setup_reloc_list(dri_bo *bo) +{ + dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bo->bufmgr; + + bo_ttm->relocs = calloc(bufmgr_ttm->max_relocs, + sizeof(struct dri_ttm_reloc)); + bo_ttm->reloc_buf_data = calloc(1, RELOC_BUF_SIZE(bufmgr_ttm->max_relocs)); + + /* Initialize the relocation list with the header: + * DWORD 0: relocation count + * DWORD 1: relocation type + * DWORD 2+3: handle to next relocation list (currently none) 64-bits + */ + bo_ttm->reloc_buf_data[0] = 0; + bo_ttm->reloc_buf_data[1] = I915_RELOC_TYPE_0; + bo_ttm->reloc_buf_data[2] = 0; + bo_ttm->reloc_buf_data[3] = 0; + + return 0; +} + +#if 0 +int +driFenceSignaled(DriFenceObject * fence, unsigned type) +{ + int signaled; + int ret; + + if (fence == NULL) + return GL_TRUE; + + ret = drmFenceSignaled(bufmgr_ttm->fd, &fence->fence, type, &signaled); + BM_CKFATAL(ret); + return signaled; +} +#endif + +static dri_bo * +dri_ttm_alloc(dri_bufmgr *bufmgr, const char *name, + unsigned long size, unsigned int alignment, + uint64_t location_mask) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; + dri_bo_ttm *ttm_buf; + unsigned int pageSize = getpagesize(); + int ret; + uint64_t flags; + unsigned int hint; + unsigned long alloc_size; + struct dri_ttm_bo_bucket *bucket; + GLboolean alloc_from_cache = GL_FALSE; + + ttm_buf = calloc(1, sizeof(*ttm_buf)); + if (!ttm_buf) + return NULL; + + /* The mask argument doesn't do anything for us that we want other than + * determine which pool (TTM or local) the buffer is allocated into, so + * just pass all of the allocation class flags. + */ + flags = location_mask | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE | + DRM_BO_FLAG_EXE; + /* No hints we want to use. */ + hint = 0; + + /* Round the allocated size up to a power of two number of pages. */ + alloc_size = 1 << logbase2(size); + if (alloc_size < pageSize) + alloc_size = pageSize; + bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, alloc_size); + + /* If we don't have caching at this size, don't actually round the + * allocation up. + */ + if (bucket == NULL || bucket->max_entries == 0) + alloc_size = size; + + /* Get a buffer out of the cache if available */ + if (bucket != NULL && bucket->num_entries > 0) { + struct dri_ttm_bo_bucket_entry *entry = bucket->head; + int busy; + + /* Check if the buffer is still in flight. If not, reuse it. */ + ret = drmBOBusy(bufmgr_ttm->fd, &entry->drm_bo, &busy); + alloc_from_cache = (ret == 0 && busy == 0); + + if (alloc_from_cache) { + bucket->head = entry->next; + if (entry->next == NULL) + bucket->tail = &bucket->head; + bucket->num_entries--; + + ttm_buf->drm_bo = entry->drm_bo; + free(entry); + } + } + + if (!alloc_from_cache) { + ret = drmBOCreate(bufmgr_ttm->fd, alloc_size, alignment / pageSize, + NULL, flags, hint, &ttm_buf->drm_bo); + if (ret != 0) { + free(ttm_buf); + return NULL; + } + } + + ttm_buf->bo.size = size; + ttm_buf->bo.offset = ttm_buf->drm_bo.offset; + ttm_buf->bo.virtual = NULL; + ttm_buf->bo.bufmgr = bufmgr; + ttm_buf->name = name; + ttm_buf->refcount = 1; + ttm_buf->reloc_buf_data = NULL; + ttm_buf->relocs = NULL; + ttm_buf->last_flags = ttm_buf->drm_bo.flags; + ttm_buf->shared = GL_FALSE; + ttm_buf->delayed_unmap = GL_FALSE; + ttm_buf->validate_index = -1; + + DBG("bo_create: %p (%s) %ldb\n", &ttm_buf->bo, ttm_buf->name, size); + + return &ttm_buf->bo; +} + +/* Our TTM backend doesn't allow creation of static buffers, as that requires + * privelege for the non-fake case, and the lock in the fake case where we were + * working around the X Server not creating buffers and passing handles to us. + */ +static dri_bo * +dri_ttm_alloc_static(dri_bufmgr *bufmgr, const char *name, + unsigned long offset, unsigned long size, void *virtual, + uint64_t location_mask) +{ + return NULL; +} + +/** + * Returns a dri_bo wrapping the given buffer object handle. + * + * This can be used when one application needs to pass a buffer object + * to another. + */ +dri_bo * +intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, + unsigned int handle) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; + dri_bo_ttm *ttm_buf; + int ret; + + ttm_buf = calloc(1, sizeof(*ttm_buf)); + if (!ttm_buf) + return NULL; + + ret = drmBOReference(bufmgr_ttm->fd, handle, &ttm_buf->drm_bo); + if (ret != 0) { + fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n", + name, handle, strerror(-ret)); + free(ttm_buf); + return NULL; + } + ttm_buf->bo.size = ttm_buf->drm_bo.size; + ttm_buf->bo.offset = ttm_buf->drm_bo.offset; + ttm_buf->bo.virtual = NULL; + ttm_buf->bo.bufmgr = bufmgr; + ttm_buf->name = name; + ttm_buf->refcount = 1; + ttm_buf->reloc_buf_data = NULL; + ttm_buf->relocs = NULL; + ttm_buf->last_flags = ttm_buf->drm_bo.flags; + ttm_buf->shared = GL_TRUE; + ttm_buf->delayed_unmap = GL_FALSE; + ttm_buf->validate_index = -1; + + DBG("bo_create_from_handle: %p %08x (%s)\n", + &ttm_buf->bo, handle, ttm_buf->name); + + return &ttm_buf->bo; +} + +static void +dri_ttm_bo_reference(dri_bo *buf) +{ + dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; + + ttm_buf->refcount++; +} + +static void +dri_ttm_bo_unreference(dri_bo *buf) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr; + dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; + + if (!buf) + return; + + if (--ttm_buf->refcount == 0) { + struct dri_ttm_bo_bucket *bucket; + int ret; + + assert(ttm_buf->map_count == 0); + + if (ttm_buf->reloc_buf_data) { + int i; + + /* Unreference all the target buffers */ + for (i = 0; i < (ttm_buf->reloc_buf_data[0] & 0xffff); i++) + dri_bo_unreference(ttm_buf->relocs[i].target_buf); + free(ttm_buf->relocs); + + /* Free the kernel BO containing relocation entries */ + free(ttm_buf->reloc_buf_data); + ttm_buf->reloc_buf_data = NULL; + } + + if (ttm_buf->delayed_unmap) { + int ret = drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo); + + if (ret != 0) { + fprintf(stderr, "%s:%d: Error unmapping buffer %s: %s.\n", + __FILE__, __LINE__, ttm_buf->name, strerror(-ret)); + } + } + + bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, ttm_buf->drm_bo.size); + /* Put the buffer into our internal cache for reuse if we can. */ + if (!ttm_buf->shared && + bucket != NULL && + (bucket->max_entries == -1 || + (bucket->max_entries > 0 && + bucket->num_entries < bucket->max_entries))) + { + struct dri_ttm_bo_bucket_entry *entry; + + entry = calloc(1, sizeof(*entry)); + entry->drm_bo = ttm_buf->drm_bo; + + entry->next = NULL; + *bucket->tail = entry; + bucket->tail = &entry->next; + bucket->num_entries++; + } else { + /* Decrement the kernel refcount for the buffer. */ + ret = drmBOUnreference(bufmgr_ttm->fd, &ttm_buf->drm_bo); + if (ret != 0) { + fprintf(stderr, "drmBOUnreference failed (%s): %s\n", + ttm_buf->name, strerror(-ret)); + } + } + + DBG("bo_unreference final: %p (%s)\n", &ttm_buf->bo, ttm_buf->name); + + free(buf); + return; + } +} + +static int +dri_ttm_bo_map(dri_bo *buf, GLboolean write_enable) +{ + dri_bufmgr_ttm *bufmgr_ttm; + dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; + uint64_t flags; + int ret; + + bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr; + + flags = DRM_BO_FLAG_READ; + if (write_enable) + flags |= DRM_BO_FLAG_WRITE; + + /* Allow recursive mapping. Mesa may recursively map buffers with + * nested display loops. + */ + if (ttm_buf->map_count++ != 0) + return 0; + + assert(buf->virtual == NULL); + + DBG("bo_map: %p (%s)\n", &ttm_buf->bo, ttm_buf->name); + + /* XXX: What about if we're upgrading from READ to WRITE? */ + if (ttm_buf->delayed_unmap) { + buf->virtual = ttm_buf->saved_virtual; + return 0; + } + + ret = drmBOMap(bufmgr_ttm->fd, &ttm_buf->drm_bo, flags, 0, &buf->virtual); + if (ret != 0) { + fprintf(stderr, "%s:%d: Error mapping buffer %s: %s .\n", + __FILE__, __LINE__, ttm_buf->name, strerror(-ret)); + } + + return ret; +} + +static int +dri_ttm_bo_unmap(dri_bo *buf) +{ + dri_bufmgr_ttm *bufmgr_ttm; + dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; + int ret; + + if (buf == NULL) + return 0; + + assert(ttm_buf->map_count != 0); + if (--ttm_buf->map_count != 0) + return 0; + + bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr; + + assert(buf->virtual != NULL); + + DBG("bo_unmap: %p (%s)\n", &ttm_buf->bo, ttm_buf->name); + + if (!ttm_buf->shared) { + ttm_buf->saved_virtual = buf->virtual; + ttm_buf->delayed_unmap = GL_TRUE; + buf->virtual = NULL; + + return 0; + } + + buf->virtual = NULL; + + ret = drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo); + if (ret != 0) { + fprintf(stderr, "%s:%d: Error unmapping buffer %s: %s.\n", + __FILE__, __LINE__, ttm_buf->name, strerror(-ret)); + } + + return ret; +} + +/** + * Returns a dri_bo wrapping the given buffer object handle. + * + * This can be used when one application needs to pass a buffer object + * to another. + */ +dri_fence * +intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name, + drm_fence_arg_t *arg) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; + dri_fence_ttm *ttm_fence; + + ttm_fence = malloc(sizeof(*ttm_fence)); + if (!ttm_fence) + return NULL; + + ttm_fence->drm_fence.handle = arg->handle; + ttm_fence->drm_fence.fence_class = arg->fence_class; + ttm_fence->drm_fence.type = arg->type; + ttm_fence->drm_fence.flags = arg->flags; + ttm_fence->drm_fence.signaled = 0; + ttm_fence->drm_fence.sequence = arg->sequence; + + ttm_fence->fence.bufmgr = bufmgr; + ttm_fence->name = name; + ttm_fence->refcount = 1; + + DBG("fence_create_from_handle: %p (%s)\n", + &ttm_fence->fence, ttm_fence->name); + + return &ttm_fence->fence; +} + + +static void +dri_ttm_fence_reference(dri_fence *fence) +{ + dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence; + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr; + + ++fence_ttm->refcount; + DBG("fence_reference: %p (%s)\n", &fence_ttm->fence, fence_ttm->name); +} + +static void +dri_ttm_fence_unreference(dri_fence *fence) +{ + dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence; + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr; + + if (!fence) + return; + + DBG("fence_unreference: %p (%s)\n", &fence_ttm->fence, fence_ttm->name); + + if (--fence_ttm->refcount == 0) { + int ret; + + ret = drmFenceUnreference(bufmgr_ttm->fd, &fence_ttm->drm_fence); + if (ret != 0) { + fprintf(stderr, "drmFenceUnreference failed (%s): %s\n", + fence_ttm->name, strerror(-ret)); + } + + free(fence); + return; + } +} + +static void +dri_ttm_fence_wait(dri_fence *fence) +{ + dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence; + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr; + int ret; + + ret = drmFenceWait(bufmgr_ttm->fd, DRM_FENCE_FLAG_WAIT_LAZY, &fence_ttm->drm_fence, 0); + if (ret != 0) { + fprintf(stderr, "%s:%d: Error waiting for fence %s: %s.\n", + __FILE__, __LINE__, fence_ttm->name, strerror(-ret)); + abort(); + } + + DBG("fence_wait: %p (%s)\n", &fence_ttm->fence, fence_ttm->name); +} + +static void +dri_bufmgr_ttm_destroy(dri_bufmgr *bufmgr) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; + int i; + + free(bufmgr_ttm->validate_array); + + /* Free any cached buffer objects we were going to reuse */ + for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) { + struct dri_ttm_bo_bucket *bucket = &bufmgr_ttm->cache_bucket[i]; + struct dri_ttm_bo_bucket_entry *entry; + + while ((entry = bucket->head) != NULL) { + int ret; + + bucket->head = entry->next; + if (entry->next == NULL) + bucket->tail = &bucket->head; + bucket->num_entries--; + + /* Decrement the kernel refcount for the buffer. */ + ret = drmBOUnreference(bufmgr_ttm->fd, &entry->drm_bo); + if (ret != 0) { + fprintf(stderr, "drmBOUnreference failed: %s\n", + strerror(-ret)); + } + + free(entry); + } + } + + free(bufmgr); +} + +/** + * Adds the target buffer to the validation list and adds the relocation + * to the reloc_buffer's relocation list. + * + * The relocation entry at the given offset must already contain the + * precomputed relocation value, because the kernel will optimize out + * the relocation entry write when the buffer hasn't moved from the + * last known offset in target_buf. + */ +static int +dri_ttm_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta, + GLuint offset, dri_bo *target_buf) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)reloc_buf->bufmgr; + dri_bo_ttm *reloc_buf_ttm = (dri_bo_ttm *)reloc_buf; + dri_bo_ttm *target_buf_ttm = (dri_bo_ttm *)target_buf; + int num_relocs; + uint32_t *this_reloc; + + /* Create a new relocation list if needed */ + if (reloc_buf_ttm->reloc_buf_data == NULL) + intel_setup_reloc_list(reloc_buf); + + num_relocs = reloc_buf_ttm->reloc_buf_data[0]; + + /* Check overflow */ + assert(num_relocs < bufmgr_ttm->max_relocs); + + this_reloc = reloc_buf_ttm->reloc_buf_data + I915_RELOC_HEADER + + num_relocs * I915_RELOC0_STRIDE; + + this_reloc[0] = offset; + this_reloc[1] = delta; + this_reloc[2] = target_buf_ttm->drm_bo.handle; /* To be filled in at exec time */ + this_reloc[3] = 0; + + reloc_buf_ttm->relocs[num_relocs].validate_flags = flags; + reloc_buf_ttm->relocs[num_relocs].target_buf = target_buf; + dri_bo_reference(target_buf); + + reloc_buf_ttm->reloc_buf_data[0]++; /* Increment relocation count */ + /* Check wraparound */ + assert(reloc_buf_ttm->reloc_buf_data[0] != 0); + return 0; +} + +/** + * Walk the tree of relocations rooted at BO and accumulate the list of + * validations to be performed and update the relocation buffers with + * index values into the validation list. + */ +static void +dri_ttm_bo_process_reloc(dri_bo *bo) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bo->bufmgr; + dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; + unsigned int nr_relocs; + int i; + + if (bo_ttm->reloc_buf_data == NULL) + return; + + nr_relocs = bo_ttm->reloc_buf_data[0] & 0xffff; + + for (i = 0; i < nr_relocs; i++) { + struct dri_ttm_reloc *r = &bo_ttm->relocs[i]; + + /* Continue walking the tree depth-first. */ + dri_ttm_bo_process_reloc(r->target_buf); + + /* Add the target to the validate list */ + intel_add_validate_buffer(r->target_buf, r->validate_flags); + + /* Clear the PRESUMED_OFFSET flag from the validate list entry of the + * target if this buffer has a stale relocated pointer at it. + */ + if (r->last_target_offset != r->target_buf->offset) { + dri_bo_ttm *target_buf_ttm = (dri_bo_ttm *)r->target_buf; + struct intel_validate_entry *entry = + &bufmgr_ttm->validate_array[target_buf_ttm->validate_index]; + + entry->bo_arg.d.req.bo_req.hint &= ~DRM_BO_HINT_PRESUMED_OFFSET; + } + } +} + +static void * +dri_ttm_process_reloc(dri_bo *batch_buf, GLuint *count) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr; + + /* Update indices and set up the validate list. */ + dri_ttm_bo_process_reloc(batch_buf); + + /* Add the batch buffer to the validation list. There are no relocations + * pointing to it. + */ + intel_add_validate_buffer(batch_buf, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE); + + *count = bufmgr_ttm->validate_count; + return &bufmgr_ttm->validate_array[0].bo_arg; +} + +static const char * +intel_get_flags_mem_type_string(uint64_t flags) +{ + switch (flags & DRM_BO_MASK_MEM) { + case DRM_BO_FLAG_MEM_LOCAL: return "local"; + case DRM_BO_FLAG_MEM_TT: return "ttm"; + case DRM_BO_FLAG_MEM_VRAM: return "vram"; + case DRM_BO_FLAG_MEM_PRIV0: return "priv0"; + case DRM_BO_FLAG_MEM_PRIV1: return "priv1"; + case DRM_BO_FLAG_MEM_PRIV2: return "priv2"; + case DRM_BO_FLAG_MEM_PRIV3: return "priv3"; + case DRM_BO_FLAG_MEM_PRIV4: return "priv4"; + default: return NULL; + } +} + +static const char * +intel_get_flags_caching_string(uint64_t flags) +{ + switch (flags & (DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED)) { + case 0: return "UU"; + case DRM_BO_FLAG_CACHED: return "CU"; + case DRM_BO_FLAG_CACHED_MAPPED: return "UC"; + case DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED: return "CC"; + default: return NULL; + } +} + +static void +intel_update_buffer_offsets (dri_bufmgr_ttm *bufmgr_ttm) +{ + int i; + + for (i = 0; i < bufmgr_ttm->validate_count; i++) { + dri_bo *bo = bufmgr_ttm->validate_array[i].bo; + dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; + struct drm_i915_op_arg *arg = &bufmgr_ttm->validate_array[i].bo_arg; + struct drm_bo_arg_rep *rep = &arg->d.rep; + + /* Update the flags */ + if (rep->bo_info.flags != bo_ttm->last_flags) { + DBG("BO %s migrated: %s/%s -> %s/%s\n", + bo_ttm->name, + intel_get_flags_mem_type_string(bo_ttm->last_flags), + intel_get_flags_caching_string(bo_ttm->last_flags), + intel_get_flags_mem_type_string(rep->bo_info.flags), + intel_get_flags_caching_string(rep->bo_info.flags)); + + bo_ttm->last_flags = rep->bo_info.flags; + } + /* Update the buffer offset */ + if (rep->bo_info.offset != bo->offset) { + DBG("BO %s migrated: 0x%08lx -> 0x%08lx\n", + bo_ttm->name, bo->offset, (unsigned long)rep->bo_info.offset); + bo->offset = rep->bo_info.offset; + } + } +} + +/** + * Update the last target offset field of relocation entries for PRESUMED_OFFSET + * computation. + */ +static void +dri_ttm_bo_post_submit(dri_bo *bo) +{ + dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; + unsigned int nr_relocs; + int i; + + if (bo_ttm->reloc_buf_data == NULL) + return; + + nr_relocs = bo_ttm->reloc_buf_data[0] & 0xffff; + + for (i = 0; i < nr_relocs; i++) { + struct dri_ttm_reloc *r = &bo_ttm->relocs[i]; + + /* Continue walking the tree depth-first. */ + dri_ttm_bo_post_submit(r->target_buf); + + r->last_target_offset = r->target_buf->offset; + } +} + +static void +dri_ttm_post_submit(dri_bo *batch_buf, dri_fence **last_fence) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr; + int i; + + intel_update_buffer_offsets (bufmgr_ttm); + + dri_ttm_bo_post_submit(batch_buf); + + if (bufmgr_ttm->bufmgr.debug) + dri_ttm_dump_validation_list(bufmgr_ttm); + + for (i = 0; i < bufmgr_ttm->validate_count; i++) { + dri_bo *bo = bufmgr_ttm->validate_array[i].bo; + dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; + + /* Disconnect the buffer from the validate list */ + bo_ttm->validate_index = -1; + dri_bo_unreference(bo); + bufmgr_ttm->validate_array[i].bo = NULL; + } + bufmgr_ttm->validate_count = 0; +} + +/** + * Enables unlimited caching of buffer objects for reuse. + * + * This is potentially very memory expensive, as the cache at each bucket + * size is only bounded by how many buffers of that size we've managed to have + * in flight at once. + */ +void +intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; + int i; + + for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) { + bufmgr_ttm->cache_bucket[i].max_entries = -1; + } +} + +/* + * + */ +static int +dri_ttm_check_aperture_space(dri_bo *bo) +{ + return 0; +} + +/** + * Initializes the TTM buffer manager, which uses the kernel to allocate, map, + * and manage map buffer objections. + * + * \param fd File descriptor of the opened DRM device. + * \param fence_type Driver-specific fence type used for fences with no flush. + * \param fence_type_flush Driver-specific fence type used for fences with a + * flush. + */ +dri_bufmgr * +intel_bufmgr_ttm_init(int fd, unsigned int fence_type, + unsigned int fence_type_flush, int batch_size) +{ + dri_bufmgr_ttm *bufmgr_ttm; + int i; + + bufmgr_ttm = calloc(1, sizeof(*bufmgr_ttm)); + bufmgr_ttm->fd = fd; + bufmgr_ttm->fence_type = fence_type; + bufmgr_ttm->fence_type_flush = fence_type_flush; + + /* Let's go with one relocation per every 2 dwords (but round down a bit + * since a power of two will mean an extra page allocation for the reloc + * buffer). + * + * Every 4 was too few for the blender benchmark. + */ + bufmgr_ttm->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; + + bufmgr_ttm->bufmgr.bo_alloc = dri_ttm_alloc; + bufmgr_ttm->bufmgr.bo_alloc_static = dri_ttm_alloc_static; + bufmgr_ttm->bufmgr.bo_reference = dri_ttm_bo_reference; + bufmgr_ttm->bufmgr.bo_unreference = dri_ttm_bo_unreference; + bufmgr_ttm->bufmgr.bo_map = dri_ttm_bo_map; + bufmgr_ttm->bufmgr.bo_unmap = dri_ttm_bo_unmap; + bufmgr_ttm->bufmgr.fence_reference = dri_ttm_fence_reference; + bufmgr_ttm->bufmgr.fence_unreference = dri_ttm_fence_unreference; + bufmgr_ttm->bufmgr.fence_wait = dri_ttm_fence_wait; + bufmgr_ttm->bufmgr.destroy = dri_bufmgr_ttm_destroy; + bufmgr_ttm->bufmgr.emit_reloc = dri_ttm_emit_reloc; + bufmgr_ttm->bufmgr.process_relocs = dri_ttm_process_reloc; + bufmgr_ttm->bufmgr.post_submit = dri_ttm_post_submit; + bufmgr_ttm->bufmgr.debug = GL_FALSE; + bufmgr_ttm->bufmgr.check_aperture_space = dri_ttm_check_aperture_space; + /* Initialize the linked lists for BO reuse cache. */ + for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) + bufmgr_ttm->cache_bucket[i].tail = &bufmgr_ttm->cache_bucket[i].head; + + return &bufmgr_ttm->bufmgr; +} +#else +dri_bufmgr * +intel_bufmgr_ttm_init(int fd, unsigned int fence_type, + unsigned int fence_type_flush, int batch_size) +{ + return NULL; +} + +dri_bo * +intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, + unsigned int handle) +{ + return NULL; +} + +void +intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr) +{ +} +#endif diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h new file mode 100644 index 0000000000..f5bd64c90f --- /dev/null +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h @@ -0,0 +1,28 @@ + +#ifndef INTEL_BUFMGR_TTM_H +#define INTEL_BUFMGR_TTM_H + +#include "dri_bufmgr.h" + +extern dri_bo *intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, + unsigned int handle); + +#ifdef TTM_API +dri_fence *intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name, + drm_fence_arg_t *arg); +#endif + + +dri_bufmgr *intel_bufmgr_ttm_init(int fd, unsigned int fence_type, + unsigned int fence_type_flush, int batch_size); + +void +intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr); + +#ifndef TTM_API +#define DRM_I915_FENCE_CLASS_ACCEL 0 +#define DRM_I915_FENCE_TYPE_RW 2 +#define DRM_I915_FENCE_FLAG_FLUSHED 0x01000000 +#endif + +#endif diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 50c1964d87..5fa9d95e11 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -59,7 +59,7 @@ #include "intel_buffer_objects.h" #include "intel_fbo.h" #include "intel_decode.h" -#include "intel_bufmgr.h" +#include "intel_bufmgr_ttm.h" #include "drirenderbuffer.h" #include "vblank.h" @@ -96,13 +96,11 @@ int INTEL_DEBUG = (0); #include "extension_helper.h" -#define DRIVER_DATE "20080716" -#define DRIVER_DATE_GEM "GEM " DRIVER_DATE +#define DRIVER_DATE "20061102" static const GLubyte * intelGetString(GLcontext * ctx, GLenum name) { - const struct intel_context *const intel = intel_context(ctx); const char *chipset; static char buffer[128]; @@ -112,7 +110,7 @@ intelGetString(GLcontext * ctx, GLenum name) break; case GL_RENDERER: - switch (intel->intelScreen->deviceID) { + switch (intel_context(ctx)->intelScreen->deviceID) { case PCI_CHIP_845_G: chipset = "Intel(R) 845G"; break; @@ -185,9 +183,7 @@ intelGetString(GLcontext * ctx, GLenum name) break; } - (void) driGetRendererString(buffer, chipset, - (intel->ttm) ? DRIVER_DATE_GEM : DRIVER_DATE, - 0); + (void) driGetRendererString(buffer, chipset, DRIVER_DATE, 0); return (GLubyte *) buffer; default: @@ -371,34 +367,22 @@ intelFlush(GLcontext * ctx) if (!IS_965(intel->intelScreen->deviceID)) INTEL_FIREVERTICES(intel); - /* Emit a flush so that any frontbuffer rendering that might have occurred - * lands onscreen in a timely manner, even if the X Server doesn't trigger - * a flush for us. - */ - intel_batchbuffer_emit_mi_flush(intel->batch); - if (intel->batch->map != intel->batch->ptr) intel_batchbuffer_flush(intel->batch); + + /* XXX: Need to do an MI_FLUSH here. + */ } void intelFinish(GLcontext * ctx) { - struct gl_framebuffer *fb = ctx->DrawBuffer; - int i; - + struct intel_context *intel = intel_context(ctx); intelFlush(ctx); - - for (i = 0; i < fb->_NumColorDrawBuffers; i++) { - struct intel_renderbuffer *irb; - - irb = intel_renderbuffer(fb->_ColorDrawBuffers[i]); - - if (irb->region) - dri_bo_wait_rendering(irb->region->buffer); - } - if (fb->_DepthBuffer) { - /* XXX: Wait on buffer idle */ + if (intel->batch->last_fence) { + dri_fence_wait(intel->batch->last_fence); + dri_fence_unreference(intel->batch->last_fence); + intel->batch->last_fence = NULL; } } @@ -464,32 +448,28 @@ static GLboolean intel_init_bufmgr(struct intel_context *intel) { intelScreenPrivate *intelScreen = intel->intelScreen; - GLboolean gem_disable = getenv("INTEL_NO_GEM") != NULL; - int gem_kernel = 0; - GLboolean gem_supported; - struct drm_i915_getparam gp; - - gp.param = I915_PARAM_HAS_GEM; - gp.value = &gem_kernel; + GLboolean ttm_disable = getenv("INTEL_NO_TTM") != NULL; + GLboolean ttm_supported; - (void) drmCommandWriteRead(intel->driFd, DRM_I915_GETPARAM, &gp, sizeof(gp)); - - /* If we've got a new enough DDX that's initializing GEM and giving us + /* If we've got a new enough DDX that's initializing TTM and giving us * object handles for the shared buffers, use that. */ intel->ttm = GL_FALSE; if (intel->intelScreen->driScrnPriv->dri2.enabled) - gem_supported = GL_TRUE; + ttm_supported = GL_TRUE; else if (intel->intelScreen->driScrnPriv->ddx_version.minor >= 9 && - gem_kernel && + intel->intelScreen->drmMinor >= 11 && intel->intelScreen->front.bo_handle != -1) - gem_supported = GL_TRUE; + ttm_supported = GL_TRUE; else - gem_supported = GL_FALSE; + ttm_supported = GL_FALSE; - if (!gem_disable && gem_supported) { + if (!ttm_disable && ttm_supported) { int bo_reuse_mode; - intel->bufmgr = intel_bufmgr_gem_init(intel->driFd, + intel->bufmgr = intel_bufmgr_ttm_init(intel->driFd, + DRM_FENCE_TYPE_EXE, + DRM_FENCE_TYPE_EXE | + DRM_I915_FENCE_TYPE_RW, BATCH_SZ); if (intel->bufmgr != NULL) intel->ttm = GL_TRUE; @@ -499,16 +479,16 @@ intel_init_bufmgr(struct intel_context *intel) case DRI_CONF_BO_REUSE_DISABLED: break; case DRI_CONF_BO_REUSE_ALL: - intel_bufmgr_gem_enable_reuse(intel->bufmgr); + intel_ttm_enable_bo_reuse(intel->bufmgr); break; } } /* Otherwise, use the classic buffer manager. */ if (intel->bufmgr == NULL) { - if (gem_disable) { - fprintf(stderr, "GEM disabled. Using classic.\n"); + if (ttm_disable) { + fprintf(stderr, "TTM buffer manager disabled. Using classic.\n"); } else { - fprintf(stderr, "Failed to initialize GEM. " + fprintf(stderr, "Failed to initialize TTM buffer manager. " "Falling back to classic.\n"); } @@ -518,17 +498,14 @@ intel_init_bufmgr(struct intel_context *intel) return GL_FALSE; } - intel->bufmgr = intel_bufmgr_fake_init(intelScreen->tex.offset, - intelScreen->tex.map, - intelScreen->tex.size, - intel_fence_emit, - intel_fence_wait, - intel); + intel->bufmgr = dri_bufmgr_fake_init(intelScreen->tex.offset, + intelScreen->tex.map, + intelScreen->tex.size, + intel_fence_emit, + intel_fence_wait, + intel); } - /* XXX bufmgr should be per-screen, not per-context */ - intelScreen->ttm = intel->ttm; - return GL_TRUE; } @@ -696,6 +673,8 @@ intelInitContext(struct intel_context *intel, intel_recreate_static_regions(intel); intel->batch = intel_batchbuffer_alloc(intel); + intel->last_swap_fence = NULL; + intel->first_swap_fence = NULL; intel_bufferobj_init(intel); intel_fbo_init(intel); @@ -713,6 +692,7 @@ intelInitContext(struct intel_context *intel, /* Force all software fallbacks */ if (driQueryOptionb(&intel->optionCache, "no_rast")) { fprintf(stderr, "disabling 3D rasterization\n"); + FALLBACK(intel, INTEL_FALLBACK_USER, 1); intel->no_rast = 1; } @@ -747,7 +727,17 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) intel->Fallback = 0; /* don't call _swrast_Flush later */ intel_batchbuffer_free(intel->batch); - free(intel->prim.vb); + + if (intel->last_swap_fence) { + dri_fence_wait(intel->last_swap_fence); + dri_fence_unreference(intel->last_swap_fence); + intel->last_swap_fence = NULL; + } + if (intel->first_swap_fence) { + dri_fence_wait(intel->first_swap_fence); + dri_fence_unreference(intel->first_swap_fence); + intel->first_swap_fence = NULL; + } if (release_texture_heaps) { /* This share group is about to go away, free our private @@ -899,7 +889,7 @@ intelContendedLock(struct intel_context *intel, GLuint flags) */ if (!intel->ttm && sarea->texAge != intel->hHWContext) { sarea->texAge = intel->hHWContext; - intel_bufmgr_fake_contended_lock_take(intel->bufmgr); + dri_bufmgr_fake_contended_lock_take(intel->bufmgr); if (INTEL_DEBUG & DEBUG_BATCH) intel_decode_context_reset(); if (INTEL_DEBUG & DEBUG_BUFMGR) @@ -1027,7 +1017,6 @@ void UNLOCK_HARDWARE( struct intel_context *intel ) * Nothing should be left in batch outside of LOCK/UNLOCK which references * cliprects. */ - if (intel->batch->cliprect_mode == REFERENCES_CLIPRECTS) - intel_batchbuffer_flush(intel->batch); + assert(intel->batch->cliprect_mode != REFERENCES_CLIPRECTS); } diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index f9a373cf74..df79ab8897 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -35,7 +35,6 @@ #include "mm.h" #include "texmem.h" #include "dri_bufmgr.h" -#include "intel_bufmgr.h" #include "intel_screen.h" #include "intel_tex_obj.h" @@ -86,7 +85,6 @@ struct intel_context { void (*destroy) (struct intel_context * intel); void (*emit_state) (struct intel_context * intel); - void (*finish_batch) (struct intel_context * intel); void (*new_batch) (struct intel_context * intel); void (*emit_invarient_state) (struct intel_context * intel); void (*note_fence) (struct intel_context *intel, GLuint fence); @@ -176,6 +174,9 @@ struct intel_context */ GLboolean ttm; + dri_fence *last_swap_fence; + dri_fence *first_swap_fence; + struct intel_batchbuffer *batch; GLboolean no_batch_wrap; unsigned batch_id; @@ -183,13 +184,9 @@ struct intel_context struct { GLuint id; - uint32_t primitive; /**< Current hardware primitive type */ + GLuint primitive; + GLubyte *start_ptr; void (*flush) (struct intel_context *); - dri_bo *vb_bo; - uint8_t *vb; - unsigned int start_offset; /**< Byte offset of primitive sequence */ - unsigned int current_offset; /**< Byte offset of next vertex */ - unsigned int count; /**< Number of vertices in current primitive */ } prim; GLuint stats_wm; @@ -294,7 +291,6 @@ extern char *__progname; #define SUBPIXEL_X 0.125 #define SUBPIXEL_Y 0.125 -#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) #define ALIGN(value, alignment) ((value + alignment - 1) & ~(alignment - 1)) #define INTEL_FIREVERTICES(intel) \ diff --git a/src/mesa/drivers/dri/intel/intel_decode.c b/src/mesa/drivers/dri/intel/intel_decode.c index 9c105013c0..a1240639f4 100644 --- a/src/mesa/drivers/dri/intel/intel_decode.c +++ b/src/mesa/drivers/dri/intel/intel_decode.c @@ -183,10 +183,9 @@ decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures) switch ((data[0] & 0x1fc00000) >> 22) { case 0x50: instr_out(data, hw_offset, 0, - "XY_COLOR_BLT (rgb %sabled, alpha %sabled, dst tile %d)\n", + "XY_COLOR_BLT (rgb %sabled, alpha %sabled)\n", (data[0] & (1 << 20)) ? "en" : "dis", - (data[0] & (1 << 21)) ? "en" : "dis", - (data[0] >> 11) & 1); + (data[0] & (1 << 21)) ? "en" : "dis"); len = (data[0] & 0x000000ff) + 2; if (len != 6) @@ -211,8 +210,7 @@ decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures) instr_out(data, hw_offset, 1, "format %s, pitch %d, " "clipping %sabled\n", format, - (short)(data[1] & 0xffff), - data[1] & (1 << 30) ? "en" : "dis"); + data[1] & 0xffff, data[1] & (1 << 30) ? "en" : "dis"); instr_out(data, hw_offset, 2, "(%d,%d)\n", data[2] & 0xffff, data[2] >> 16); instr_out(data, hw_offset, 3, "(%d,%d)\n", @@ -222,12 +220,9 @@ decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures) return len; case 0x53: instr_out(data, hw_offset, 0, - "XY_SRC_COPY_BLT (rgb %sabled, alpha %sabled, " - "src tile %d, dst tile %d)\n", + "XY_SRC_COPY_BLT (rgb %sabled, alpha %sabled)\n", (data[0] & (1 << 20)) ? "en" : "dis", - (data[0] & (1 << 21)) ? "en" : "dis", - (data[0] >> 15) & 1, - (data[0] >> 11) & 1); + (data[0] & (1 << 21)) ? "en" : "dis"); len = (data[0] & 0x000000ff) + 2; if (len != 8) @@ -252,17 +247,16 @@ decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures) instr_out(data, hw_offset, 1, "format %s, dst pitch %d, " "clipping %sabled\n", format, - (short)(data[1] & 0xffff), - data[1] & (1 << 30) ? "en" : "dis"); + data[1] & 0xffff, data[1] & (1 << 30) ? "en" : "dis"); instr_out(data, hw_offset, 2, "dst (%d,%d)\n", data[2] & 0xffff, data[2] >> 16); instr_out(data, hw_offset, 3, "dst (%d,%d)\n", - data[3] & 0xffff, data[3] >> 16); + data[2] & 0xffff, data[2] >> 16); instr_out(data, hw_offset, 4, "dst offset 0x%08x\n", data[4]); instr_out(data, hw_offset, 5, "src (%d,%d)\n", data[5] & 0xffff, data[5] >> 16); instr_out(data, hw_offset, 6, "src pitch %d\n", - (short)(data[6] & 0xffff)); + data[6] & 0xffff); instr_out(data, hw_offset, 7, "src offset 0x%08x\n", data[7]); return len; } diff --git a/src/mesa/drivers/dri/intel/intel_depthstencil.c b/src/mesa/drivers/dri/intel/intel_depthstencil.c index 70ba68e9e3..90baecd8c2 100644 --- a/src/mesa/drivers/dri/intel/intel_depthstencil.c +++ b/src/mesa/drivers/dri/intel/intel_depthstencil.c @@ -39,7 +39,7 @@ #include "intel_fbo.h" #include "intel_depthstencil.h" #include "intel_regions.h" -#include "intel_span.h" + /** * The GL_EXT_framebuffer_object allows the user to create their own @@ -86,33 +86,68 @@ * */ + + +static void +map_regions(GLcontext * ctx, + struct intel_renderbuffer *depthRb, + struct intel_renderbuffer *stencilRb) +{ + struct intel_context *intel = intel_context(ctx); + if (depthRb && depthRb->region) { + intel_region_map(intel, depthRb->region); + depthRb->pfMap = depthRb->region->map; + depthRb->pfPitch = depthRb->region->pitch; + } + if (stencilRb && stencilRb->region) { + intel_region_map(intel, stencilRb->region); + stencilRb->pfMap = stencilRb->region->map; + stencilRb->pfPitch = stencilRb->region->pitch; + } +} + +static void +unmap_regions(GLcontext * ctx, + struct intel_renderbuffer *depthRb, + struct intel_renderbuffer *stencilRb) +{ + struct intel_context *intel = intel_context(ctx); + if (depthRb && depthRb->region) { + intel_region_unmap(intel, depthRb->region); + depthRb->pfMap = NULL; + depthRb->pfPitch = 0; + } + if (stencilRb && stencilRb->region) { + intel_region_unmap(intel, stencilRb->region); + stencilRb->pfMap = NULL; + stencilRb->pfPitch = 0; + } +} + + + /** * Undo the pairing/interleaving between depth and stencil buffers. * irb should be a depth/stencil or stencil renderbuffer. */ void -intel_unpair_depth_stencil(GLcontext *ctx, struct intel_renderbuffer *irb) +intel_unpair_depth_stencil(GLcontext * ctx, struct intel_renderbuffer *irb) { - struct intel_context *intel = intel_context(ctx); - struct gl_renderbuffer *rb = &irb->Base; - if (irb->PairedStencil) { /* irb is a depth/stencil buffer */ struct gl_renderbuffer *stencilRb; struct intel_renderbuffer *stencilIrb; - ASSERT(rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT); + ASSERT(irb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT); stencilRb = _mesa_lookup_renderbuffer(ctx, irb->PairedStencil); stencilIrb = intel_renderbuffer(stencilRb); if (stencilIrb) { /* need to extract stencil values from the depth buffer */ - ASSERT(stencilIrb->PairedDepth == rb->Name); - intel_renderbuffer_map(intel, rb); - intel_renderbuffer_map(intel, stencilRb); - _mesa_extract_stencil(ctx, rb, stencilRb); - intel_renderbuffer_unmap(intel, stencilRb); - intel_renderbuffer_unmap(intel, rb); + ASSERT(stencilIrb->PairedDepth == irb->Base.Name); + map_regions(ctx, irb, stencilIrb); + _mesa_extract_stencil(ctx, &irb->Base, &stencilIrb->Base); + unmap_regions(ctx, irb, stencilIrb); stencilIrb->PairedDepth = 0; } irb->PairedStencil = 0; @@ -122,19 +157,17 @@ intel_unpair_depth_stencil(GLcontext *ctx, struct intel_renderbuffer *irb) struct gl_renderbuffer *depthRb; struct intel_renderbuffer *depthIrb; - ASSERT(rb->_ActualFormat == GL_STENCIL_INDEX8_EXT || - rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT); + ASSERT(irb->Base._ActualFormat == GL_STENCIL_INDEX8_EXT || + irb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT); depthRb = _mesa_lookup_renderbuffer(ctx, irb->PairedDepth); depthIrb = intel_renderbuffer(depthRb); if (depthIrb) { /* need to extract stencil values from the depth buffer */ - ASSERT(depthIrb->PairedStencil == rb->Name); - intel_renderbuffer_map(intel, rb); - intel_renderbuffer_map(intel, depthRb); - _mesa_extract_stencil(ctx, depthRb, rb); - intel_renderbuffer_unmap(intel, depthRb); - intel_renderbuffer_unmap(intel, rb); + ASSERT(depthIrb->PairedStencil == irb->Base.Name); + map_regions(ctx, depthIrb, irb); + _mesa_extract_stencil(ctx, &depthIrb->Base, &irb->Base); + unmap_regions(ctx, depthIrb, irb); depthIrb->PairedStencil = 0; } irb->PairedDepth = 0; @@ -161,7 +194,6 @@ void intel_validate_paired_depth_stencil(GLcontext * ctx, struct gl_framebuffer *fb) { - struct intel_context *intel = intel_context(ctx); struct intel_renderbuffer *depthRb, *stencilRb; depthRb = intel_get_renderbuffer(fb, BUFFER_DEPTH); @@ -198,11 +230,9 @@ intel_validate_paired_depth_stencil(GLcontext * ctx, stencilRb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT); /* establish new pairing: interleave stencil into depth buffer */ - intel_renderbuffer_map(intel, &depthRb->Base); - intel_renderbuffer_map(intel, &stencilRb->Base); + map_regions(ctx, depthRb, stencilRb); _mesa_insert_stencil(ctx, &depthRb->Base, &stencilRb->Base); - intel_renderbuffer_unmap(intel, &stencilRb->Base); - intel_renderbuffer_unmap(intel, &depthRb->Base); + unmap_regions(ctx, depthRb, stencilRb); depthRb->PairedStencil = stencilRb->Base.Name; stencilRb->PairedDepth = depthRb->Base.Name; } diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 5bd2ebfdcf..b3f6610546 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -153,9 +153,6 @@ intel_delete_renderbuffer(struct gl_renderbuffer *rb) intel_unpair_depth_stencil(ctx, irb); } - if (irb->span_cache != NULL) - _mesa_free(irb->span_cache); - if (intel && irb->region) { intel_region_release(&irb->region); } @@ -212,14 +209,6 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, case GL_RGB10: case GL_RGB12: case GL_RGB16: - rb->_ActualFormat = GL_RGB8; - rb->DataType = GL_UNSIGNED_BYTE; - rb->RedBits = 8; - rb->GreenBits = 8; - rb->BlueBits = 8; - rb->AlphaBits = 0; - cpp = 4; - break; case GL_RGBA: case GL_RGBA2: case GL_RGBA4: @@ -305,6 +294,9 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, rb->Width = width; rb->Height = height; + /* This sets the Get/PutRow/Value functions */ + intel_set_span_functions(&irb->Base); + return GL_TRUE; } } @@ -374,6 +366,7 @@ intel_renderbuffer_set_region(struct intel_renderbuffer *rb, intel_region_reference(&rb->region, region); intel_region_release(&old); + rb->pfMap = region->map; rb->pfPitch = region->pitch; } @@ -453,6 +446,8 @@ intel_create_renderbuffer(GLenum intFormat) irb->Base.Delete = intel_delete_renderbuffer; irb->Base.AllocStorage = intel_alloc_window_storage; irb->Base.GetPointer = intel_get_pointer; + /* This sets the Get/PutRow/Value functions */ + intel_set_span_functions(&irb->Base); return irb; } @@ -524,7 +519,7 @@ intel_framebuffer_renderbuffer(GLcontext * ctx, static GLboolean intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, - struct gl_texture_image *texImage) + struct gl_texture_image *texImage) { if (texImage->TexFormat == &_mesa_texformat_argb8888) { irb->Base._ActualFormat = GL_RGBA8; @@ -563,6 +558,7 @@ intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, irb->Base.Delete = intel_delete_renderbuffer; irb->Base.AllocStorage = intel_nop_alloc_storage; + intel_set_span_functions(&irb->Base); irb->RenderToTexture = GL_TRUE; diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h index 9d15582d78..c90c84b48c 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.h +++ b/src/mesa/drivers/dri/intel/intel_fbo.h @@ -28,9 +28,9 @@ #ifndef INTEL_FBO_H #define INTEL_FBO_H -#include "intel_screen.h" struct intel_context; +struct intel_region; /** * Intel framebuffer, derived from gl_framebuffer. @@ -70,6 +70,7 @@ struct intel_renderbuffer { struct gl_renderbuffer Base; struct intel_region *region; + void *pfMap; /* possibly paged flipped map pointer */ GLuint pfPitch; /* possibly paged flipped pitch */ GLboolean RenderToTexture; /* RTT? */ @@ -79,9 +80,6 @@ struct intel_renderbuffer GLuint pf_pending; /**< sequence number of pending flip */ GLuint vbl_pending; /**< vblank sequence number of pending flip */ - - uint8_t *span_cache; - unsigned long span_cache_offset; }; extern struct intel_renderbuffer *intel_renderbuffer(struct gl_renderbuffer diff --git a/src/mesa/drivers/dri/intel/intel_ioctl.c b/src/mesa/drivers/dri/intel/intel_ioctl.c index 58c81766cd..f4566ba89c 100644 --- a/src/mesa/drivers/dri/intel/intel_ioctl.c +++ b/src/mesa/drivers/dri/intel/intel_ioctl.c @@ -30,8 +30,6 @@ #include #include #include -#include -#include #include "mtypes.h" #include "context.h" @@ -45,7 +43,7 @@ #include "drm.h" #include "i915_drm.h" -#include "intel_bufmgr.h" +#include "intel_bufmgr_ttm.h" #define FILE_DEBUG_FLAG DEBUG_IOCTL @@ -106,7 +104,7 @@ intelWaitIrq(struct intel_context *intel, int seq) } -int +void intel_batch_ioctl(struct intel_context *intel, GLuint start_offset, GLuint used, @@ -115,7 +113,7 @@ intel_batch_ioctl(struct intel_context *intel, struct drm_i915_batchbuffer batch; if (intel->no_hw) - return 0; + return; assert(intel->locked); assert(used); @@ -144,42 +142,82 @@ intel_batch_ioctl(struct intel_context *intel, if (drmCommandWrite(intel->driFd, DRM_I915_BATCHBUFFER, &batch, sizeof(batch))) { fprintf(stderr, "DRM_I915_BATCHBUFFER: %d\n", -errno); - return -errno; + UNLOCK_HARDWARE(intel); + exit(1); } - - return 0; } -int +#ifdef TTM_API +void intel_exec_ioctl(struct intel_context *intel, GLuint used, GLboolean ignore_cliprects, GLboolean allow_unlock, - struct drm_i915_gem_execbuffer *execbuf) + void *start, GLuint count, dri_fence **fence) { + struct drm_i915_execbuffer execbuf; + dri_fence *fo; int ret; assert(intel->locked); assert(used); if (intel->no_hw) - return 0; + return; + + if (*fence) { + dri_fence_unreference(*fence); + } - execbuf->batch_start_offset = 0; - execbuf->batch_len = used; - execbuf->cliprects_ptr = (uintptr_t)intel->pClipRects; - execbuf->num_cliprects = ignore_cliprects ? 0 : intel->numClipRects; - execbuf->DR1 = 0; - execbuf->DR4 = ((((GLuint) intel->drawX) & 0xffff) | - (((GLuint) intel->drawY) << 16)); + memset(&execbuf, 0, sizeof(execbuf)); + + execbuf.num_buffers = count; + execbuf.batch.used = used; + execbuf.batch.cliprects = intel->pClipRects; + execbuf.batch.num_cliprects = ignore_cliprects ? 0 : intel->numClipRects; + execbuf.batch.DR1 = 0; + execbuf.batch.DR4 = ((((GLuint) intel->drawX) & 0xffff) | + (((GLuint) intel->drawY) << 16)); + + execbuf.ops_list = (unsigned long)start; // TODO + execbuf.fence_arg.flags = DRM_FENCE_FLAG_SHAREABLE | DRM_I915_FENCE_FLAG_FLUSHED; do { - ret = ioctl(intel->driFd, DRM_IOCTL_I915_GEM_EXECBUFFER, execbuf); + ret = drmCommandWriteRead(intel->driFd, DRM_I915_EXECBUFFER, &execbuf, + sizeof(execbuf)); } while (ret == -EAGAIN); if (ret != 0) { - fprintf(stderr, "DRM_I915_GEM_EXECBUFFER: %d\n", -errno); - return -errno; + fprintf(stderr, "DRM_I915_EXECBUFFER: %d\n", -errno); + UNLOCK_HARDWARE(intel); + exit(1); } - return 0; + if (execbuf.fence_arg.error != 0) { + + /* + * Fence creation has failed, but the GPU has been + * idled by the kernel. Safe to continue. + */ + + *fence = NULL; + return; + } + + fo = intel_ttm_fence_create_from_arg(intel->bufmgr, "fence buffers", + &execbuf.fence_arg); + if (!fo) { + fprintf(stderr, "failed to fence handle: %08x\n", execbuf.fence_arg.handle); + UNLOCK_HARDWARE(intel); + exit(1); + } + *fence = fo; +} +#else +void +intel_exec_ioctl(struct intel_context *intel, + GLuint used, + GLboolean ignore_cliprects, GLboolean allow_unlock, + void *start, GLuint count, dri_fence **fence) +{ } +#endif diff --git a/src/mesa/drivers/dri/intel/intel_ioctl.h b/src/mesa/drivers/dri/intel/intel_ioctl.h index 526e38358c..8674aef723 100644 --- a/src/mesa/drivers/dri/intel/intel_ioctl.h +++ b/src/mesa/drivers/dri/intel/intel_ioctl.h @@ -33,14 +33,14 @@ void intelWaitIrq( struct intel_context *intel, int seq ); int intelEmitIrqLocked( struct intel_context *intel ); -int intel_batch_ioctl(struct intel_context *intel, - GLuint start_offset, +void intel_batch_ioctl( struct intel_context *intel, + GLuint start_offset, + GLuint used, + GLboolean ignore_cliprects, + GLboolean allow_unlock ); +void intel_exec_ioctl(struct intel_context *intel, GLuint used, - GLboolean ignore_cliprects, - GLboolean allow_unlock); -int intel_exec_ioctl(struct intel_context *intel, - GLuint used, - GLboolean ignore_cliprects, GLboolean allow_unlock, - struct drm_i915_gem_execbuffer *execbuf); + GLboolean ignore_cliprects, GLboolean allow_unlock, + void *start, GLuint count, dri_fence **fence); #endif diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c index 7e0d20e681..81238acfe4 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c @@ -43,7 +43,7 @@ #include "intel_buffer_objects.h" #include "intel_buffers.h" #include "intel_pixel.h" -#include "intel_reg.h" + #define FILE_DEBUG_FLAG DEBUG_PIXEL @@ -293,7 +293,7 @@ do_blit_bitmap( GLcontext *ctx, dst->pitch, dst->buffer, 0, - dst->tiling, + dst->tiled, rect.x1 + px, rect.y2 - (py + h), w, h, @@ -301,8 +301,9 @@ do_blit_bitmap( GLcontext *ctx, } } } + out: + intel_batchbuffer_flush(intel->batch); } -out: UNLOCK_HARDWARE(intel); diff --git a/src/mesa/drivers/dri/intel/intel_pixel_copy.c b/src/mesa/drivers/dri/intel/intel_pixel_copy.c index 3093ccf7c6..45f72bac52 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_copy.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_copy.c @@ -229,7 +229,7 @@ do_texture_copypixels(GLcontext * ctx, out: intel->vtbl.leave_meta_state(intel); - intel_batchbuffer_emit_mi_flush(intel->batch); + intel_batchbuffer_flush(intel->batch); } UNLOCK_HARDWARE(intel); @@ -337,16 +337,18 @@ do_blit_copypixels(GLcontext * ctx, continue; intelEmitCopyBlit(intel, dst->cpp, - src->pitch, src->buffer, 0, src->tiling, - dst->pitch, dst->buffer, 0, dst->tiling, + src->pitch, src->buffer, 0, src->tiled, + dst->pitch, dst->buffer, 0, dst->tiled, clip_x + delta_x, clip_y + delta_y, /* srcx, srcy */ clip_x, clip_y, /* dstx, dsty */ clip_w, clip_h, ctx->Color.ColorLogicOpEnabled ? ctx->Color.LogicOp : GL_COPY); } + + out: + intel_batchbuffer_flush(intel->batch); } -out: UNLOCK_HARDWARE(intel); DBG("%s: success\n", __FUNCTION__); diff --git a/src/mesa/drivers/dri/intel/intel_pixel_draw.c b/src/mesa/drivers/dri/intel/intel_pixel_draw.c index 5675084da5..34813d2aa0 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_draw.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_draw.c @@ -81,8 +81,7 @@ do_texture_drawpixels(GLcontext * ctx, else { /* PBO only for now: */ - if (INTEL_DEBUG & DEBUG_PIXEL) - _mesa_printf("%s - not PBO\n", __FUNCTION__); +/* _mesa_printf("%s - not PBO\n", __FUNCTION__); */ return GL_FALSE; } @@ -181,7 +180,7 @@ do_texture_drawpixels(GLcontext * ctx, srcx, srcx + width, srcy + height, srcy); out: intel->vtbl.leave_meta_state(intel); - intel_batchbuffer_emit_mi_flush(intel->batch); + intel_batchbuffer_flush(intel->batch); } UNLOCK_HARDWARE(intel); return GL_TRUE; @@ -219,6 +218,7 @@ do_blit_drawpixels(GLcontext * ctx, struct intel_buffer_object *src = intel_buffer_object(unpack->BufferObj); GLuint src_offset; GLuint rowLength; + dri_fence *fence = NULL; if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s\n", __FUNCTION__); @@ -314,7 +314,7 @@ do_blit_drawpixels(GLcontext * ctx, intelEmitCopyBlit(intel, dest->cpp, rowLength, src_buffer, src_offset, GL_FALSE, - dest->pitch, dest->buffer, 0, dest->tiling, + dest->pitch, dest->buffer, 0, dest->tiled, rect.x1 - dest_rect.x1, rect.y2 - dest_rect.y2, rect.x1, @@ -322,9 +322,17 @@ do_blit_drawpixels(GLcontext * ctx, ctx->Color.ColorLogicOpEnabled ? ctx->Color.LogicOp : GL_COPY); } + intel_batchbuffer_flush(intel->batch); + fence = intel->batch->last_fence; + dri_fence_reference(fence); } UNLOCK_HARDWARE(intel); + if (fence) { + dri_fence_wait(fence); + dri_fence_unreference(fence); + } + if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s - DONE\n", __FUNCTION__); diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h index 96af7e1a03..37629c07e2 100644 --- a/src/mesa/drivers/dri/intel/intel_reg.h +++ b/src/mesa/drivers/dri/intel/intel_reg.h @@ -31,140 +31,11 @@ #define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23) -#define MI_FLUSH (CMD_MI | (4 << 23)) -#define FLUSH_MAP_CACHE (1 << 0) -#define INHIBIT_FLUSH_RENDER_CACHE (1 << 2) - /* Stalls command execution waiting for the given events to have occurred. */ #define MI_WAIT_FOR_EVENT (CMD_MI | (0x3 << 23)) #define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) #define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) -/* p189 */ -#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 (CMD_3D | (0x1d<<24) | (0x04<<16)) -#define I1_LOAD_S(n) (1<<(4+n)) - -/** @{ - * 915 definitions - */ -#define S0_VB_OFFSET_MASK 0xffffffc -#define S0_AUTO_CACHE_INV_DISABLE (1<<0) -/** @} */ - -/** @{ - * 830 definitions - */ -#define S0_VB_OFFSET_MASK_830 0xffffff8 -#define S0_VB_PITCH_SHIFT_830 1 -#define S0_VB_ENABLE_830 0 -/** @} */ - -#define S1_VERTEX_WIDTH_SHIFT 24 -#define S1_VERTEX_WIDTH_MASK (0x3f<<24) -#define S1_VERTEX_PITCH_SHIFT 16 -#define S1_VERTEX_PITCH_MASK (0x3f<<16) - -#define TEXCOORDFMT_2D 0x0 -#define TEXCOORDFMT_3D 0x1 -#define TEXCOORDFMT_4D 0x2 -#define TEXCOORDFMT_1D 0x3 -#define TEXCOORDFMT_2D_16 0x4 -#define TEXCOORDFMT_4D_16 0x5 -#define TEXCOORDFMT_NOT_PRESENT 0xf -#define S2_TEXCOORD_FMT0_MASK 0xf -#define S2_TEXCOORD_FMT1_SHIFT 4 -#define S2_TEXCOORD_FMT(unit, type) ((type)<<(unit*4)) -#define S2_TEXCOORD_NONE (~0) -#define S2_TEX_COUNT_SHIFT_830 12 -#define S2_VERTEX_0_WIDTH_SHIFT_830 0 -#define S2_VERTEX_1_WIDTH_SHIFT_830 6 -/* S3 not interesting */ - -#define S4_POINT_WIDTH_SHIFT 23 -#define S4_POINT_WIDTH_MASK (0x1ff<<23) -#define S4_LINE_WIDTH_SHIFT 19 -#define S4_LINE_WIDTH_ONE (0x2<<19) -#define S4_LINE_WIDTH_MASK (0xf<<19) -#define S4_FLATSHADE_ALPHA (1<<18) -#define S4_FLATSHADE_FOG (1<<17) -#define S4_FLATSHADE_SPECULAR (1<<16) -#define S4_FLATSHADE_COLOR (1<<15) -#define S4_CULLMODE_BOTH (0<<13) -#define S4_CULLMODE_NONE (1<<13) -#define S4_CULLMODE_CW (2<<13) -#define S4_CULLMODE_CCW (3<<13) -#define S4_CULLMODE_MASK (3<<13) -#define S4_VFMT_POINT_WIDTH (1<<12) -#define S4_VFMT_SPEC_FOG (1<<11) -#define S4_VFMT_COLOR (1<<10) -#define S4_VFMT_DEPTH_OFFSET (1<<9) -#define S4_VFMT_XYZ (1<<6) -#define S4_VFMT_XYZW (2<<6) -#define S4_VFMT_XY (3<<6) -#define S4_VFMT_XYW (4<<6) -#define S4_VFMT_XYZW_MASK (7<<6) -#define S4_FORCE_DEFAULT_DIFFUSE (1<<5) -#define S4_FORCE_DEFAULT_SPECULAR (1<<4) -#define S4_LOCAL_DEPTH_OFFSET_ENABLE (1<<3) -#define S4_VFMT_FOG_PARAM (1<<2) -#define S4_SPRITE_POINT_ENABLE (1<<1) -#define S4_LINE_ANTIALIAS_ENABLE (1<<0) - -#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH | \ - S4_VFMT_SPEC_FOG | \ - S4_VFMT_COLOR | \ - S4_VFMT_DEPTH_OFFSET | \ - S4_VFMT_XYZW_MASK | \ - S4_VFMT_FOG_PARAM) - - -#define S5_WRITEDISABLE_ALPHA (1<<31) -#define S5_WRITEDISABLE_RED (1<<30) -#define S5_WRITEDISABLE_GREEN (1<<29) -#define S5_WRITEDISABLE_BLUE (1<<28) -#define S5_WRITEDISABLE_MASK (0xf<<28) -#define S5_FORCE_DEFAULT_POINT_SIZE (1<<27) -#define S5_LAST_PIXEL_ENABLE (1<<26) -#define S5_GLOBAL_DEPTH_OFFSET_ENABLE (1<<25) -#define S5_FOG_ENABLE (1<<24) -#define S5_STENCIL_REF_SHIFT 16 -#define S5_STENCIL_REF_MASK (0xff<<16) -#define S5_STENCIL_TEST_FUNC_SHIFT 13 -#define S5_STENCIL_TEST_FUNC_MASK (0x7<<13) -#define S5_STENCIL_FAIL_SHIFT 10 -#define S5_STENCIL_FAIL_MASK (0x7<<10) -#define S5_STENCIL_PASS_Z_FAIL_SHIFT 7 -#define S5_STENCIL_PASS_Z_FAIL_MASK (0x7<<7) -#define S5_STENCIL_PASS_Z_PASS_SHIFT 4 -#define S5_STENCIL_PASS_Z_PASS_MASK (0x7<<4) -#define S5_STENCIL_WRITE_ENABLE (1<<3) -#define S5_STENCIL_TEST_ENABLE (1<<2) -#define S5_COLOR_DITHER_ENABLE (1<<1) -#define S5_LOGICOP_ENABLE (1<<0) - - -#define S6_ALPHA_TEST_ENABLE (1<<31) -#define S6_ALPHA_TEST_FUNC_SHIFT 28 -#define S6_ALPHA_TEST_FUNC_MASK (0x7<<28) -#define S6_ALPHA_REF_SHIFT 20 -#define S6_ALPHA_REF_MASK (0xff<<20) -#define S6_DEPTH_TEST_ENABLE (1<<19) -#define S6_DEPTH_TEST_FUNC_SHIFT 16 -#define S6_DEPTH_TEST_FUNC_MASK (0x7<<16) -#define S6_CBUF_BLEND_ENABLE (1<<15) -#define S6_CBUF_BLEND_FUNC_SHIFT 12 -#define S6_CBUF_BLEND_FUNC_MASK (0x7<<12) -#define S6_CBUF_SRC_BLEND_FACT_SHIFT 8 -#define S6_CBUF_SRC_BLEND_FACT_MASK (0xf<<8) -#define S6_CBUF_DST_BLEND_FACT_SHIFT 4 -#define S6_CBUF_DST_BLEND_FACT_MASK (0xf<<4) -#define S6_DEPTH_WRITE_ENABLE (1<<3) -#define S6_COLOR_WRITE_ENABLE (1<<2) -#define S6_TRISTRIP_PV_SHIFT 0 -#define S6_TRISTRIP_PV_MASK (0x3<<0) - -#define S7_DEPTH_OFFSET_CONST_MASK ~0 - /* Primitive dispatch on 830-945 */ #define _3DPRIMITIVE (CMD_3D | (0x1f << 24)) #define PRIM_INDIRECT (1<<23) diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index ddfdce3835..35ab46afea 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -39,17 +39,13 @@ * last moment. */ -#include -#include - #include "intel_context.h" #include "intel_regions.h" #include "intel_blit.h" #include "intel_buffer_objects.h" #include "dri_bufmgr.h" -#include "intel_bufmgr.h" +#include "intel_bufmgr_ttm.h" #include "intel_batchbuffer.h" -#include "intel_chipset.h" #define FILE_DEBUG_FLAG DEBUG_REGION @@ -80,34 +76,10 @@ intel_region_unmap(struct intel_context *intel, struct intel_region *region) } } -static int -intel_set_region_tiling_gem(struct intel_context *intel, - struct intel_region *region, - uint32_t bo_handle) -{ - struct drm_i915_gem_get_tiling get_tiling; - int ret; - - memset(&get_tiling, 0, sizeof(get_tiling)); - - get_tiling.handle = bo_handle; - ret = ioctl(intel->driFd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling); - if (ret != 0) { - fprintf(stderr, "Failed to get tiling state for region: %s\n", - strerror(errno)); - return ret; - } - - region->tiling = get_tiling.tiling_mode; - region->bit_6_swizzle = get_tiling.swizzle_mode; - - return 0; -} - static struct intel_region * intel_region_alloc_internal(struct intel_context *intel, GLuint cpp, GLuint pitch, GLuint height, - dri_bo *buffer) + GLuint tiled, dri_bo *buffer) { struct intel_region *region; @@ -121,12 +93,9 @@ intel_region_alloc_internal(struct intel_context *intel, region->pitch = pitch; region->height = height; /* needed? */ region->refcount = 1; + region->tiled = tiled; region->buffer = buffer; - /* Default to no tiling */ - region->tiling = I915_TILING_NONE; - region->bit_6_swizzle = I915_BIT_6_SWIZZLE_NONE; - return region; } @@ -137,28 +106,25 @@ intel_region_alloc(struct intel_context *intel, dri_bo *buffer; buffer = dri_bo_alloc(intel->bufmgr, "region", - pitch * cpp * height, 64); + pitch * cpp * height, 64, + DRM_BO_FLAG_MEM_LOCAL | + DRM_BO_FLAG_CACHED | + DRM_BO_FLAG_CACHED_MAPPED); - return intel_region_alloc_internal(intel, cpp, pitch, height, buffer); + return intel_region_alloc_internal(intel, cpp, pitch, height, 0, buffer); } struct intel_region * intel_region_alloc_for_handle(struct intel_context *intel, GLuint cpp, GLuint pitch, GLuint height, - GLuint handle) + GLuint tiled, GLuint handle) { - struct intel_region *region; dri_bo *buffer; - buffer = intel_bo_gem_create_from_name(intel->bufmgr, "dri2 region", handle); + buffer = intel_ttm_bo_create_from_handle(intel->bufmgr, "region", handle); - region = intel_region_alloc_internal(intel, cpp, pitch, height, buffer); - if (region == NULL) - return region; - - intel_set_region_tiling_gem(intel, region, handle); - - return region; + return intel_region_alloc_internal(intel, + cpp, pitch, height, tiled, buffer); } void @@ -172,34 +138,26 @@ intel_region_reference(struct intel_region **dst, struct intel_region *src) } void -intel_region_release(struct intel_region **region_handle) +intel_region_release(struct intel_region **region) { - struct intel_region *region = *region_handle; - - if (region == NULL) + if (!*region) return; - DBG("%s %d\n", __FUNCTION__, region->refcount - 1); - - ASSERT(region->refcount > 0); - region->refcount--; + DBG("%s %d\n", __FUNCTION__, (*region)->refcount - 1); - if (region->refcount == 0) { - assert(region->map_refcount == 0); + ASSERT((*region)->refcount > 0); + (*region)->refcount--; - if (region->pbo) - region->pbo->region = NULL; - region->pbo = NULL; - dri_bo_unreference(region->buffer); - - if (region->classic_map != NULL) { - drmUnmap(region->classic_map, - region->pitch * region->cpp * region->height); - } + if ((*region)->refcount == 0) { + assert((*region)->map_refcount == 0); - free(region); + if ((*region)->pbo) + (*region)->pbo->region = NULL; + (*region)->pbo = NULL; + dri_bo_unreference((*region)->buffer); + free(*region); } - *region_handle = NULL; + *region = NULL; } /* @@ -314,8 +272,8 @@ intel_region_copy(struct intel_context *intel, intelEmitCopyBlit(intel, dst->cpp, - src->pitch, src->buffer, src_offset, src->tiling, - dst->pitch, dst->buffer, dst_offset, dst->tiling, + src->pitch, src->buffer, src_offset, src->tiled, + dst->pitch, dst->buffer, dst_offset, dst->tiled, srcx, srcy, dstx, dsty, width, height, GL_COPY); } @@ -345,7 +303,7 @@ intel_region_fill(struct intel_context *intel, intelEmitFillBlit(intel, dst->cpp, - dst->pitch, dst->buffer, dst_offset, dst->tiling, + dst->pitch, dst->buffer, dst_offset, dst->tiled, dstx, dsty, width, height, color); } @@ -397,7 +355,10 @@ intel_region_release_pbo(struct intel_context *intel, region->buffer = dri_bo_alloc(intel->bufmgr, "region", region->pitch * region->cpp * region->height, - 64); + 64, + DRM_BO_FLAG_MEM_LOCAL | + DRM_BO_FLAG_CACHED | + DRM_BO_FLAG_CACHED_MAPPED); } /* Break the COW tie to the pbo. Both the pbo and the region end up @@ -421,19 +382,23 @@ intel_region_cow(struct intel_context *intel, struct intel_region *region) /* Now blit from the texture buffer to the new buffer: */ + intel_batchbuffer_flush(intel->batch); + was_locked = intel->locked; - if (!was_locked) + if (intel->locked) LOCK_HARDWARE(intel); intelEmitCopyBlit(intel, region->cpp, - region->pitch, region->buffer, 0, region->tiling, - region->pitch, pbo->buffer, 0, region->tiling, + region->pitch, region->buffer, 0, region->tiled, + region->pitch, pbo->buffer, 0, region->tiled, 0, 0, 0, 0, region->pitch, region->height, GL_COPY); - if (!was_locked) + intel_batchbuffer_flush(intel->batch); + + if (was_locked) UNLOCK_HARDWARE(intel); } @@ -458,7 +423,6 @@ intel_recreate_static(struct intel_context *intel, intelRegion *region_desc) { intelScreenPrivate *intelScreen = intel->intelScreen; - int ret; if (region == NULL) { region = calloc(sizeof(*region), 1); @@ -471,45 +435,21 @@ intel_recreate_static(struct intel_context *intel, region->cpp = intel->ctx.Visual.rgbBits / 8; region->pitch = intelScreen->pitch; region->height = intelScreen->height; /* needed? */ + region->tiled = region_desc->tiled; if (intel->ttm) { assert(region_desc->bo_handle != -1); - region->buffer = intel_bo_gem_create_from_name(intel->bufmgr, - name, - region_desc->bo_handle); - - intel_set_region_tiling_gem(intel, region, region_desc->bo_handle); + region->buffer = intel_ttm_bo_create_from_handle(intel->bufmgr, + name, + region_desc->bo_handle); } else { - ret = drmMap(intel->driFd, region_desc->handle, - region->pitch * region->cpp * region->height, - ®ion->classic_map); - if (ret != 0) { - fprintf(stderr, "Failed to drmMap %s buffer\n", name); - free(region); - return NULL; - } - - region->buffer = intel_bo_fake_alloc_static(intel->bufmgr, - name, - region_desc->offset, - region->pitch * region->cpp * - region->height, - region->classic_map); - - /* The sarea just gives us a boolean for whether it's tiled or not, - * instead of which tiling mode it is. Guess. - */ - if (region_desc->tiled) { - if (IS_965(intel->intelScreen->deviceID) && - region_desc == &intelScreen->depth) - region->tiling = I915_TILING_Y; - else - region->tiling = I915_TILING_X; - } else { - region->tiling = I915_TILING_NONE; - } - - region->bit_6_swizzle = I915_BIT_6_SWIZZLE_NONE; + region->buffer = dri_bo_alloc_static(intel->bufmgr, + name, + region_desc->offset, + intelScreen->pitch * + intelScreen->height, + region_desc->map, + DRM_BO_FLAG_MEM_TT); } assert(region->buffer != NULL); diff --git a/src/mesa/drivers/dri/intel/intel_regions.h b/src/mesa/drivers/dri/intel/intel_regions.h index e5f19fbb45..229f79aeba 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.h +++ b/src/mesa/drivers/dri/intel/intel_regions.h @@ -28,12 +28,6 @@ #ifndef INTEL_REGIONS_H #define INTEL_REGIONS_H -/** @file intel_regions.h - * - * Structure definitions and prototypes for intel_region handling, which is - * the basic structure for rectangular collections of pixels stored in a dri_bo. - */ - #include "mtypes.h" #include "dri_bufmgr.h" @@ -59,9 +53,8 @@ struct intel_region GLuint map_refcount; /**< Reference count for mapping */ GLuint draw_offset; /**< Offset of drawing address within the region */ - uint32_t tiling; /**< Which tiling mode the region is in */ - uint32_t bit_6_swizzle; /**< GEM flag for address swizzling requirement */ - drmAddress classic_map; /**< drmMap of the region when not in GEM mode */ + GLboolean tiled; /**< True if the region is X or Y-tiled. Used on 965. */ + struct intel_buffer_object *pbo; /* zero-copy uploads */ }; @@ -76,7 +69,7 @@ struct intel_region *intel_region_alloc(struct intel_context *intel, struct intel_region * intel_region_alloc_for_handle(struct intel_context *intel, GLuint cpp, GLuint pitch, GLuint height, - unsigned int handle); + GLuint tiled, unsigned int handle); void intel_region_reference(struct intel_region **dst, struct intel_region *src); diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 9ed89906d5..5dded4b167 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -49,7 +49,7 @@ #include "i830_dri.h" #include "intel_regions.h" #include "intel_batchbuffer.h" -#include "intel_bufmgr.h" +#include "intel_bufmgr_ttm.h" PUBLIC const char __driConfigOptions[] = DRI_CONF_BEGIN @@ -59,7 +59,7 @@ PUBLIC const char __driConfigOptions[] = /* Options correspond to DRI_CONF_BO_REUSE_DISABLED, * DRI_CONF_BO_REUSE_ALL */ - DRI_CONF_OPT_BEGIN_V(bo_reuse, enum, 1, "0:1") + DRI_CONF_OPT_BEGIN_V(bo_reuse, enum, 0, "0:1") DRI_CONF_DESC_BEGIN(en, "Buffer object reuse") DRI_CONF_ENUM(0, "Disable buffer object reuse") DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects") @@ -90,6 +90,51 @@ intelMapScreenRegions(__DRIscreenPrivate * sPriv) { intelScreenPrivate *intelScreen = (intelScreenPrivate *) sPriv->private; + if (intelScreen->front.handle) { + if (drmMap(sPriv->fd, + intelScreen->front.handle, + intelScreen->front.size, + (drmAddress *) & intelScreen->front.map) != 0) { + _mesa_problem(NULL, "drmMap(frontbuffer) failed!"); + return GL_FALSE; + } + } + else { + _mesa_warning(NULL, "no front buffer handle in intelMapScreenRegions!"); + } + + if (0) + _mesa_printf("Back 0x%08x ", intelScreen->back.handle); + if (drmMap(sPriv->fd, + intelScreen->back.handle, + intelScreen->back.size, + (drmAddress *) & intelScreen->back.map) != 0) { + intelUnmapScreenRegions(intelScreen); + return GL_FALSE; + } + + if (intelScreen->third.handle) { + if (0) + _mesa_printf("Third 0x%08x ", intelScreen->third.handle); + if (drmMap(sPriv->fd, + intelScreen->third.handle, + intelScreen->third.size, + (drmAddress *) & intelScreen->third.map) != 0) { + intelUnmapScreenRegions(intelScreen); + return GL_FALSE; + } + } + + if (0) + _mesa_printf("Depth 0x%08x ", intelScreen->depth.handle); + if (drmMap(sPriv->fd, + intelScreen->depth.handle, + intelScreen->depth.size, + (drmAddress *) & intelScreen->depth.map) != 0) { + intelUnmapScreenRegions(intelScreen); + return GL_FALSE; + } + if (0) _mesa_printf("TEX 0x%08x ", intelScreen->tex.handle); if (intelScreen->tex.size != 0) { @@ -102,15 +147,50 @@ intelMapScreenRegions(__DRIscreenPrivate * sPriv) } } + if (0) + printf("Mappings: front: %p back: %p third: %p depth: %p tex: %p\n", + intelScreen->front.map, + intelScreen->back.map, intelScreen->third.map, + intelScreen->depth.map, intelScreen->tex.map); return GL_TRUE; } void intelUnmapScreenRegions(intelScreenPrivate * intelScreen) { +#define REALLY_UNMAP 1 + if (intelScreen->front.map) { +#if REALLY_UNMAP + if (drmUnmap(intelScreen->front.map, intelScreen->front.size) != 0) + printf("drmUnmap front failed!\n"); +#endif + intelScreen->front.map = NULL; + } + if (intelScreen->back.map) { +#if REALLY_UNMAP + if (drmUnmap(intelScreen->back.map, intelScreen->back.size) != 0) + printf("drmUnmap back failed!\n"); +#endif + intelScreen->back.map = NULL; + } + if (intelScreen->third.map) { +#if REALLY_UNMAP + if (drmUnmap(intelScreen->third.map, intelScreen->third.size) != 0) + printf("drmUnmap third failed!\n"); +#endif + intelScreen->third.map = NULL; + } + if (intelScreen->depth.map) { +#if REALLY_UNMAP + drmUnmap(intelScreen->depth.map, intelScreen->depth.size); + intelScreen->depth.map = NULL; +#endif + } if (intelScreen->tex.map) { +#if REALLY_UNMAP drmUnmap(intelScreen->tex.map, intelScreen->tex.size); intelScreen->tex.map = NULL; +#endif } } @@ -141,16 +221,16 @@ intelPrintSAREA(const struct drm_i915_sarea * sarea) sarea->height); fprintf(stderr, "SAREA: pitch: %d\n", sarea->pitch); fprintf(stderr, - "SAREA: front offset: 0x%08x size: 0x%x handle: 0x%x tiled: %d\n", + "SAREA: front offset: 0x%08x size: 0x%x handle: 0x%x\n", sarea->front_offset, sarea->front_size, - (unsigned) sarea->front_handle, sarea->front_tiled); + (unsigned) sarea->front_handle); fprintf(stderr, - "SAREA: back offset: 0x%08x size: 0x%x handle: 0x%x tiled: %d\n", + "SAREA: back offset: 0x%08x size: 0x%x handle: 0x%x\n", sarea->back_offset, sarea->back_size, - (unsigned) sarea->back_handle, sarea->back_tiled); - fprintf(stderr, "SAREA: depth offset: 0x%08x size: 0x%x handle: 0x%x tiled: %d\n", + (unsigned) sarea->back_handle); + fprintf(stderr, "SAREA: depth offset: 0x%08x size: 0x%x handle: 0x%x\n", sarea->depth_offset, sarea->depth_size, - (unsigned) sarea->depth_handle, sarea->depth_tiled); + (unsigned) sarea->depth_handle); fprintf(stderr, "SAREA: tex offset: 0x%08x size: 0x%x handle: 0x%x\n", sarea->tex_offset, sarea->tex_size, (unsigned) sarea->tex_handle); } @@ -254,6 +334,8 @@ intelHandleDrawableConfig(__DRIdrawablePrivate *dPriv, * attached. */ } +#define BUFFER_FLAG_TILED 0x0100 + /** * DRI2 entrypoint */ @@ -266,6 +348,7 @@ intelHandleBufferAttach(__DRIdrawablePrivate *dPriv, struct intel_renderbuffer *rb; struct intel_region *region; struct intel_context *intel = pcp->driverPrivate; + GLuint tiled; switch (ba->buffer.attachment) { case DRI_DRAWABLE_BUFFER_FRONT_LEFT: @@ -299,9 +382,10 @@ intelHandleBufferAttach(__DRIdrawablePrivate *dPriv, return; #endif + tiled = (ba->buffer.flags & BUFFER_FLAG_TILED) > 0; region = intel_region_alloc_for_handle(intel, ba->buffer.cpp, ba->buffer.pitch / ba->buffer.cpp, - dPriv->h, + dPriv->h, tiled, ba->buffer.handle); intel_renderbuffer_set_region(rb, region); @@ -446,13 +530,14 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, _mesa_initialize_framebuffer(&intel_fb->Base, mesaVis); /* setup the hardware-based renderbuffers */ - intel_fb->color_rb[0] = intel_create_renderbuffer(rgbFormat); - _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_FRONT_LEFT, - &intel_fb->color_rb[0]->Base); + { + intel_fb->color_rb[0] = intel_create_renderbuffer(rgbFormat); + _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_FRONT_LEFT, + &intel_fb->color_rb[0]->Base); + } if (mesaVis->doubleBufferMode) { - intel_fb->color_rb[1] = intel_create_renderbuffer(rgbFormat); - + intel_fb->color_rb[1] = intel_create_renderbuffer(rgbFormat); _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_BACK_LEFT, &intel_fb->color_rb[1]->Base); @@ -484,7 +569,7 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, else if (mesaVis->depthBits == 16) { /* just 16-bit depth buffer, no hw stencil */ struct intel_renderbuffer *depthRb - = intel_create_renderbuffer(GL_DEPTH_COMPONENT16); + = intel_create_renderbuffer(GL_DEPTH_COMPONENT16); _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, &depthRb->Base); } diff --git a/src/mesa/drivers/dri/intel/intel_screen.h b/src/mesa/drivers/dri/intel/intel_screen.h index 9a73b13951..e62b2d7c89 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.h +++ b/src/mesa/drivers/dri/intel/intel_screen.h @@ -74,8 +74,6 @@ typedef struct int irq_active; int allow_batchbuffer; - int ttm; - /** * Configuration cache with default values for all contexts */ diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index edede3a74b..742b1b8735 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -39,224 +39,6 @@ #include "swrast/swrast.h" -static void -intel_set_span_functions(struct intel_context *intel, - struct gl_renderbuffer *rb); - -#define SPAN_CACHE_SIZE 4096 - -static void -get_span_cache(struct intel_renderbuffer *irb, uint32_t offset) -{ - if (irb->span_cache == NULL) { - irb->span_cache = _mesa_malloc(SPAN_CACHE_SIZE); - irb->span_cache_offset = -1; - } - - if ((offset & ~(SPAN_CACHE_SIZE - 1)) != irb->span_cache_offset) { - irb->span_cache_offset = offset & ~(SPAN_CACHE_SIZE - 1); - dri_bo_get_subdata(irb->region->buffer, irb->span_cache_offset, - SPAN_CACHE_SIZE, irb->span_cache); - } -} - -static void -clear_span_cache(struct intel_renderbuffer *irb) -{ - irb->span_cache_offset = -1; -} - -static uint32_t -pread_32(struct intel_renderbuffer *irb, uint32_t offset) -{ - get_span_cache(irb, offset); - - return *(uint32_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1))); -} - -static uint32_t -pread_xrgb8888(struct intel_renderbuffer *irb, uint32_t offset) -{ - get_span_cache(irb, offset); - - return *(uint32_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1))) | - 0xff000000; -} - -static uint16_t -pread_16(struct intel_renderbuffer *irb, uint32_t offset) -{ - get_span_cache(irb, offset); - - return *(uint16_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1))); -} - -static uint8_t -pread_8(struct intel_renderbuffer *irb, uint32_t offset) -{ - get_span_cache(irb, offset); - - return *(uint8_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1))); -} - -static void -pwrite_32(struct intel_renderbuffer *irb, uint32_t offset, uint32_t val) -{ - clear_span_cache(irb); - - dri_bo_subdata(irb->region->buffer, offset, 4, &val); -} - -static void -pwrite_xrgb8888(struct intel_renderbuffer *irb, uint32_t offset, uint32_t val) -{ - clear_span_cache(irb); - - dri_bo_subdata(irb->region->buffer, offset, 3, &val); -} - -static void -pwrite_16(struct intel_renderbuffer *irb, uint32_t offset, uint16_t val) -{ - clear_span_cache(irb); - - dri_bo_subdata(irb->region->buffer, offset, 2, &val); -} - -static void -pwrite_8(struct intel_renderbuffer *irb, uint32_t offset, uint8_t val) -{ - clear_span_cache(irb); - - dri_bo_subdata(irb->region->buffer, offset, 1, &val); -} - -static uint32_t no_tile_swizzle(struct intel_renderbuffer *irb, - struct intel_context *intel, - int x, int y) -{ - x += intel->drawX; - y += intel->drawY; - - return (y * irb->region->pitch + x) * irb->region->cpp; -} - -/* - * Deal with tiled surfaces - */ - -static uint32_t x_tile_swizzle(struct intel_renderbuffer *irb, - struct intel_context *intel, - int x, int y) -{ - int tile_stride; - int xbyte; - int x_tile_off, y_tile_off; - int x_tile_number, y_tile_number; - int tile_off, tile_base; - - tile_stride = (irb->pfPitch * irb->region->cpp) << 3; - - x += intel->drawX; - y += intel->drawY; - - xbyte = x * irb->region->cpp; - - x_tile_off = xbyte & 0x1ff; - y_tile_off = y & 7; - - x_tile_number = xbyte >> 9; - y_tile_number = y >> 3; - - tile_off = (y_tile_off << 9) + x_tile_off; - - switch (irb->region->bit_6_swizzle) { - case I915_BIT_6_SWIZZLE_NONE: - break; - case I915_BIT_6_SWIZZLE_9: - tile_off ^= ((tile_off >> 3) & 64); - break; - case I915_BIT_6_SWIZZLE_9_10: - tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64); - break; - case I915_BIT_6_SWIZZLE_9_11: - tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 5) & 64); - break; - case I915_BIT_6_SWIZZLE_9_10_11: - tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64) ^ - ((tile_off >> 5) & 64); - break; - default: - fprintf(stderr, "Unknown tile swizzling mode %d\n", - irb->region->bit_6_swizzle); - exit(1); - } - - tile_base = (x_tile_number << 12) + y_tile_number * tile_stride; - -#if 0 - printf("(%d,%d) -> %d + %d = %d (pitch = %d, tstride = %d)\n", - x, y, tile_off, tile_base, - tile_off + tile_base, - irb->pfPitch, tile_stride); -#endif - - return tile_base + tile_off; -} - -static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, - struct intel_context *intel, - int x, int y) -{ - int tile_stride; - int xbyte; - int x_tile_off, y_tile_off; - int x_tile_number, y_tile_number; - int tile_off, tile_base; - - tile_stride = (irb->pfPitch * irb->region->cpp) << 5; - - x += intel->drawX; - y += intel->drawY; - - xbyte = x * irb->region->cpp; - - x_tile_off = xbyte & 0x7f; - y_tile_off = y & 0x1f; - - x_tile_number = xbyte >> 7; - y_tile_number = y >> 5; - - tile_off = ((x_tile_off & ~0xf) << 5) + (y_tile_off << 4) + - (x_tile_off & 0xf); - - switch (irb->region->bit_6_swizzle) { - case I915_BIT_6_SWIZZLE_NONE: - break; - case I915_BIT_6_SWIZZLE_9: - tile_off ^= ((tile_off >> 3) & 64); - break; - case I915_BIT_6_SWIZZLE_9_10: - tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64); - break; - case I915_BIT_6_SWIZZLE_9_11: - tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 5) & 64); - break; - case I915_BIT_6_SWIZZLE_9_10_11: - tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64) ^ - ((tile_off >> 5) & 64); - break; - default: - fprintf(stderr, "Unknown tile swizzling mode %d\n", - irb->region->bit_6_swizzle); - exit(1); - } - - tile_base = (x_tile_number << 12) + y_tile_number * tile_stride; - - return tile_base + tile_off; -} - /* break intelWriteRGBASpan_ARGB8888 */ @@ -269,7 +51,10 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ const GLint yScale = irb->RenderToTexture ? 1 : -1; \ const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1; \ + GLubyte *buf = (GLubyte *) irb->pfMap \ + + (intel->drawY * irb->pfPitch + intel->drawX) * irb->region->cpp;\ GLuint p; \ + assert(irb->pfMap);\ (void) p; /* XXX FBO: this is identical to the macro in spantmp2.h except we get @@ -284,14 +69,12 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, int miny = intel->pClipRects[_nc].y1 - intel->drawY; \ int maxx = intel->pClipRects[_nc].x2 - intel->drawX; \ int maxy = intel->pClipRects[_nc].y2 - intel->drawY; - -#if 0 - }} -#endif + + + #define Y_FLIP(_y) ((_y) * yScale + yBias) -/* XXX with GEM, these need to tell the kernel */ #define HW_LOCK() #define HW_UNLOCK() @@ -303,8 +86,7 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #define TAG(x) intel##x##_RGB565 #define TAG2(x,y) intel##x##_RGB565##y -#define GET_VALUE(X, Y) pread_16(irb, no_tile_swizzle(irb, intel, X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_16(irb, no_tile_swizzle(irb, intel, X, Y), V) +#define GET_PTR(X,Y) (buf + ((Y) * irb->pfPitch + (X)) * 2) #include "spantmp2.h" /* 32 bit, ARGB8888 color spanline and pixel functions @@ -314,89 +96,17 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #define TAG(x) intel##x##_ARGB8888 #define TAG2(x,y) intel##x##_ARGB8888##y -#define GET_VALUE(X, Y) pread_32(irb, no_tile_swizzle(irb, intel, X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_32(irb, no_tile_swizzle(irb, intel, X, Y), V) -#include "spantmp2.h" - -/* 32 bit, xRGB8888 color spanline and pixel functions - */ -#define SPANTMP_PIXEL_FMT GL_BGRA -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV - -#define TAG(x) intel##x##_xRGB8888 -#define TAG2(x,y) intel##x##_xRGB8888##y -#define GET_VALUE(X, Y) pread_xrgb8888(irb, no_tile_swizzle(irb, intel, X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, no_tile_swizzle(irb, intel, X, Y), V) -#include "spantmp2.h" - -/* 16 bit RGB565 color tile spanline and pixel functions - */ - -#define SPANTMP_PIXEL_FMT GL_RGB -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 - -#define TAG(x) intel_XTile_##x##_RGB565 -#define TAG2(x,y) intel_XTile_##x##_RGB565##y -#define GET_VALUE(X, Y) pread_16(irb, x_tile_swizzle(irb, intel, X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_16(irb, x_tile_swizzle(irb, intel, X, Y), V) -#include "spantmp2.h" - -#define SPANTMP_PIXEL_FMT GL_RGB -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 - -#define TAG(x) intel_YTile_##x##_RGB565 -#define TAG2(x,y) intel_YTile_##x##_RGB565##y -#define GET_VALUE(X, Y) pread_16(irb, y_tile_swizzle(irb, intel, X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_16(irb, y_tile_swizzle(irb, intel, X, Y), V) -#include "spantmp2.h" - -/* 32 bit ARGB888 color tile spanline and pixel functions - */ - -#define SPANTMP_PIXEL_FMT GL_BGRA -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV - -#define TAG(x) intel_XTile_##x##_ARGB8888 -#define TAG2(x,y) intel_XTile_##x##_ARGB8888##y -#define GET_VALUE(X, Y) pread_32(irb, x_tile_swizzle(irb, intel, X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_32(irb, x_tile_swizzle(irb, intel, X, Y), V) -#include "spantmp2.h" - -#define SPANTMP_PIXEL_FMT GL_BGRA -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV - -#define TAG(x) intel_YTile_##x##_ARGB8888 -#define TAG2(x,y) intel_YTile_##x##_ARGB8888##y -#define GET_VALUE(X, Y) pread_32(irb, y_tile_swizzle(irb, intel, X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_32(irb, y_tile_swizzle(irb, intel, X, Y), V) -#include "spantmp2.h" - -/* 32 bit xRGB888 color tile spanline and pixel functions - */ - -#define SPANTMP_PIXEL_FMT GL_BGRA -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV - -#define TAG(x) intel_XTile_##x##_xRGB8888 -#define TAG2(x,y) intel_XTile_##x##_xRGB8888##y -#define GET_VALUE(X, Y) pread_xrgb8888(irb, x_tile_swizzle(irb, intel, X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, x_tile_swizzle(irb, intel, X, Y), V) -#include "spantmp2.h" - -#define SPANTMP_PIXEL_FMT GL_BGRA -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV - -#define TAG(x) intel_YTile_##x##_xRGB8888 -#define TAG2(x,y) intel_YTile_##x##_xRGB8888##y -#define GET_VALUE(X, Y) pread_xrgb8888(irb, y_tile_swizzle(irb, intel, X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, y_tile_swizzle(irb, intel, X, Y), V) +#define GET_PTR(X,Y) (buf + ((Y) * irb->pfPitch + (X)) * 4) #include "spantmp2.h" #define LOCAL_DEPTH_VARS \ struct intel_context *intel = intel_context(ctx); \ struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ + const GLuint pitch = irb->pfPitch/***XXX region->pitch*/; /* in pixels */ \ const GLint yScale = irb->RenderToTexture ? 1 : -1; \ - const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1; + const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1; \ + char *buf = (char *) irb->pfMap/*XXX use region->map*/ + \ + (intel->drawY * pitch + intel->drawX) * irb->region->cpp; #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS @@ -405,34 +115,15 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, ** 16-bit depthbuffer functions. **/ #define VALUE_TYPE GLushort -#define WRITE_DEPTH(_x, _y, d) \ - pwrite_16(irb, no_tile_swizzle(irb, intel, _x, _y), d) -#define READ_DEPTH(d, _x, _y) \ - d = pread_16(irb, no_tile_swizzle(irb, intel, _x, _y)) -#define TAG(x) intel##x##_z16 -#include "depthtmp.h" +#define WRITE_DEPTH( _x, _y, d ) \ + ((GLushort *)buf)[(_x) + (_y) * pitch] = d; -/** - ** 16-bit x tile depthbuffer functions. - **/ -#define VALUE_TYPE GLushort -#define WRITE_DEPTH(_x, _y, d) \ - pwrite_16(irb, x_tile_swizzle(irb, intel, _x, _y), d) -#define READ_DEPTH(d, _x, _y) \ - d = pread_16(irb, x_tile_swizzle(irb, intel, _x, _y)) -#define TAG(x) intel_XTile_##x##_z16 -#include "depthtmp.h" +#define READ_DEPTH( d, _x, _y ) \ + d = ((GLushort *)buf)[(_x) + (_y) * pitch]; -/** - ** 16-bit y tile depthbuffer functions. - **/ -#define VALUE_TYPE GLushort -#define WRITE_DEPTH(_x, _y, d) \ - pwrite_16(irb, y_tile_swizzle(irb, intel, _x, _y), d) -#define READ_DEPTH(d, _x, _y) \ - d = pread_16(irb, y_tile_swizzle(irb, intel, _x, _y)) -#define TAG(x) intel_YTile_##x##_z16 + +#define TAG(x) intel##x##_z16 #include "depthtmp.h" @@ -445,129 +136,38 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #define VALUE_TYPE GLuint /* Change ZZZS -> SZZZ */ -#define WRITE_DEPTH(_x, _y, d) \ - pwrite_32(irb, no_tile_swizzle(irb, intel, _x, _y), \ - ((d) >> 8) | ((d) << 24)) - -/* Change SZZZ -> ZZZS */ -#define READ_DEPTH( d, _x, _y ) { \ - GLuint tmp = pread_32(irb, no_tile_swizzle(irb, intel, _x, _y)); \ - d = (tmp << 8) | (tmp >> 24); \ +#define WRITE_DEPTH( _x, _y, d ) { \ + GLuint tmp = ((d) >> 8) | ((d) << 24); \ + ((GLuint *)buf)[(_x) + (_y) * pitch] = tmp; \ } -#define TAG(x) intel##x##_z24_s8 -#include "depthtmp.h" - - -/** - ** 24/8-bit x-tile interleaved depth/stencil functions - ** Note: we're actually reading back combined depth+stencil values. - ** The wrappers in main/depthstencil.c are used to extract the depth - ** and stencil values. - **/ -#define VALUE_TYPE GLuint - -/* Change ZZZS -> SZZZ */ -#define WRITE_DEPTH(_x, _y, d) \ - pwrite_32(irb, x_tile_swizzle(irb, intel, _x, _y), \ - ((d) >> 8) | ((d) << 24)) \ - /* Change SZZZ -> ZZZS */ #define READ_DEPTH( d, _x, _y ) { \ - GLuint tmp = pread_32(irb, x_tile_swizzle(irb, intel, _x, _y)); \ + GLuint tmp = ((GLuint *)buf)[(_x) + (_y) * pitch]; \ d = (tmp << 8) | (tmp >> 24); \ } -#define TAG(x) intel_XTile_##x##_z24_s8 -#include "depthtmp.h" - -/** - ** 24/8-bit y-tile interleaved depth/stencil functions - ** Note: we're actually reading back combined depth+stencil values. - ** The wrappers in main/depthstencil.c are used to extract the depth - ** and stencil values. - **/ -#define VALUE_TYPE GLuint - -/* Change ZZZS -> SZZZ */ -#define WRITE_DEPTH(_x, _y, d) \ - pwrite_32(irb, y_tile_swizzle(irb, intel, _x, _y), \ - ((d) >> 8) | ((d) << 24)) - -/* Change SZZZ -> ZZZS */ -#define READ_DEPTH( d, _x, _y ) { \ - GLuint tmp = pread_32(irb, y_tile_swizzle(irb, intel, _x, _y)); \ - d = (tmp << 8) | (tmp >> 24); \ -} - -#define TAG(x) intel_YTile_##x##_z24_s8 +#define TAG(x) intel##x##_z24_s8 #include "depthtmp.h" /** ** 8-bit stencil function (XXX FBO: This is obsolete) **/ -#define WRITE_STENCIL(_x, _y, d) \ - pwrite_8(irb, no_tile_swizzle(irb, intel, _x, _y) + 3, d) +#define WRITE_STENCIL( _x, _y, d ) { \ + GLuint tmp = ((GLuint *)buf)[(_x) + (_y) * pitch]; \ + tmp &= 0xffffff; \ + tmp |= ((d) << 24); \ + ((GLuint *) buf)[(_x) + (_y) * pitch] = tmp; \ +} -#define READ_STENCIL(d, _x, _y) \ - d = pread_8(irb, no_tile_swizzle(irb, intel, _x, _y) + 3); +#define READ_STENCIL( d, _x, _y ) \ + d = ((GLuint *)buf)[(_x) + (_y) * pitch] >> 24; #define TAG(x) intel##x##_z24_s8 #include "stenciltmp.h" -/** - ** 8-bit x-tile stencil function (XXX FBO: This is obsolete) - **/ -#define WRITE_STENCIL(_x, _y, d) \ - pwrite_8(irb, x_tile_swizzle(irb, intel, _x, _y) + 3, d) -#define READ_STENCIL(d, _x, _y) \ - d = pread_8(irb, x_tile_swizzle(irb, intel, _x, _y) + 3); - -#define TAG(x) intel_XTile_##x##_z24_s8 -#include "stenciltmp.h" - -/** - ** 8-bit y-tile stencil function (XXX FBO: This is obsolete) - **/ -#define WRITE_STENCIL(_x, _y, d) \ - pwrite_8(irb, y_tile_swizzle(irb, intel, _x, _y) + 3, d) - -#define READ_STENCIL(d, _x, _y) \ - d = pread_8(irb, y_tile_swizzle(irb, intel, _x, _y) + 3) - -#define TAG(x) intel_YTile_##x##_z24_s8 -#include "stenciltmp.h" - -void -intel_renderbuffer_map(struct intel_context *intel, struct gl_renderbuffer *rb) -{ - struct intel_renderbuffer *irb = intel_renderbuffer(rb); - - if (irb == NULL || irb->region == NULL) - return; - - irb->pfPitch = irb->region->pitch; - - intel_set_span_functions(intel, rb); -} - -void -intel_renderbuffer_unmap(struct intel_context *intel, - struct gl_renderbuffer *rb) -{ - struct intel_renderbuffer *irb = intel_renderbuffer(rb); - - if (irb == NULL || irb->region == NULL) - return; - - clear_span_cache(irb); - irb->pfPitch = 0; - - rb->GetRow = NULL; - rb->PutRow = NULL; -} /** * Map or unmap all the renderbuffers which we may need during @@ -586,13 +186,23 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map) { GLcontext *ctx = &intel->ctx; GLuint i, j; + struct intel_renderbuffer *irb; /* color draw buffers */ for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++) { - if (map) - intel_renderbuffer_map(intel, ctx->DrawBuffer->_ColorDrawBuffers[j]); - else - intel_renderbuffer_unmap(intel, ctx->DrawBuffer->_ColorDrawBuffers[j]); + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[j]; + irb = intel_renderbuffer(rb); + if (irb) { + /* this is a user-created intel_renderbuffer */ + if (irb->region) { + if (map) + intel_region_map(intel, irb->region); + else + intel_region_unmap(intel, irb->region); + irb->pfMap = irb->region->map; + irb->pfPitch = irb->region->pitch; + } + } } /* check for render to textures */ @@ -615,28 +225,77 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map) } /* color read buffers */ - if (map) - intel_renderbuffer_map(intel, ctx->ReadBuffer->_ColorReadBuffer); - else - intel_renderbuffer_unmap(intel, ctx->ReadBuffer->_ColorReadBuffer); + irb = intel_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer); + if (irb && irb->region) { + if (map) + intel_region_map(intel, irb->region); + else + intel_region_unmap(intel, irb->region); + irb->pfMap = irb->region->map; + irb->pfPitch = irb->region->pitch; + } + + /* Account for front/back color page flipping. + * The span routines use the pfMap and pfPitch fields which will + * swap the front/back region map/pitch if we're page flipped. + * Do this after mapping, above, so the map field is valid. + */ +#if 0 + if (map && ctx->DrawBuffer->Name == 0) { + struct intel_renderbuffer *irbFront + = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_FRONT_LEFT); + struct intel_renderbuffer *irbBack + = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_BACK_LEFT); + if (irbBack) { + /* double buffered */ + if (intel->sarea->pf_current_page == 0) { + irbFront->pfMap = irbFront->region->map; + irbFront->pfPitch = irbFront->region->pitch; + irbBack->pfMap = irbBack->region->map; + irbBack->pfPitch = irbBack->region->pitch; + } + else { + irbFront->pfMap = irbBack->region->map; + irbFront->pfPitch = irbBack->region->pitch; + irbBack->pfMap = irbFront->region->map; + irbBack->pfPitch = irbFront->region->pitch; + } + } + } +#endif /* depth buffer (Note wrapper!) */ if (ctx->DrawBuffer->_DepthBuffer) { - if (map) - intel_renderbuffer_map(intel, ctx->DrawBuffer->_DepthBuffer->Wrapped); - else - intel_renderbuffer_unmap(intel, - ctx->DrawBuffer->_DepthBuffer->Wrapped); + irb = intel_renderbuffer(ctx->DrawBuffer->_DepthBuffer->Wrapped); + if (irb && irb->region) { + if (map) { + intel_region_map(intel, irb->region); + irb->pfMap = irb->region->map; + irb->pfPitch = irb->region->pitch; + } + else { + intel_region_unmap(intel, irb->region); + irb->pfMap = irb->region->map; + irb->pfPitch = irb->region->pitch; + } + } } /* stencil buffer (Note wrapper!) */ if (ctx->DrawBuffer->_StencilBuffer) { - if (map) - intel_renderbuffer_map(intel, - ctx->DrawBuffer->_StencilBuffer->Wrapped); - else - intel_renderbuffer_unmap(intel, - ctx->DrawBuffer->_StencilBuffer->Wrapped); + irb = intel_renderbuffer(ctx->DrawBuffer->_StencilBuffer->Wrapped); + if (irb && irb->region) { + if (map) { + intel_region_map(intel, irb->region); + irb->pfMap = irb->region->map; + irb->pfPitch = irb->region->pitch; + } + else { + intel_region_unmap(intel, irb->region); + irb->pfMap = irb->region->map; + irb->pfPitch = irb->region->pitch; + } + } } } @@ -654,9 +313,18 @@ intelSpanRenderStart(GLcontext * ctx) struct intel_context *intel = intel_context(ctx); GLuint i; - intelFlush(&intel->ctx); + intelFinish(&intel->ctx); LOCK_HARDWARE(intel); +#if 0 + /* Just map the framebuffer and all textures. Bufmgr code will + * take care of waiting on the necessary fences: + */ + intel_region_map(intel, intel->front_region); + intel_region_map(intel, intel->back_region); + intel_region_map(intel, intel->depth_region); +#endif + for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) { if (ctx->Texture.Unit[i]._ReallyEnabled) { struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current; @@ -679,6 +347,14 @@ intelSpanRenderFinish(GLcontext * ctx) _swrast_flush(ctx); + /* Now unmap the framebuffer: + */ +#if 0 + intel_region_unmap(intel, intel->front_region); + intel_region_unmap(intel, intel->back_region); + intel_region_unmap(intel, intel->depth_region); +#endif + for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) { if (ctx->Texture.Unit[i]._ReallyEnabled) { struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current; @@ -705,108 +381,26 @@ intelInitSpanFuncs(GLcontext * ctx) * Plug in appropriate span read/write functions for the given renderbuffer. * These are used for the software fallbacks. */ -static void -intel_set_span_functions(struct intel_context *intel, - struct gl_renderbuffer *rb) +void +intel_set_span_functions(struct gl_renderbuffer *rb) { - struct intel_renderbuffer *irb = (struct intel_renderbuffer *) rb; - uint32_t tiling; - - /* If in GEM mode, we need to do the tile address swizzling ourselves, - * instead of the fence registers handling it. - */ - if (intel->ttm) - tiling = irb->region->tiling; - else - tiling = I915_TILING_NONE; - if (rb->_ActualFormat == GL_RGB5) { /* 565 RGB */ - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitPointers_RGB565(rb); - break; - case I915_TILING_X: - intel_XTile_InitPointers_RGB565(rb); - break; - case I915_TILING_Y: - intel_YTile_InitPointers_RGB565(rb); - break; - } - } - else if (rb->_ActualFormat == GL_RGB8) { - /* 8888 RGBx */ - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitPointers_xRGB8888(rb); - break; - case I915_TILING_X: - intel_XTile_InitPointers_xRGB8888(rb); - break; - case I915_TILING_Y: - intel_YTile_InitPointers_xRGB8888(rb); - break; - } + intelInitPointers_RGB565(rb); } else if (rb->_ActualFormat == GL_RGBA8) { /* 8888 RGBA */ - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitPointers_ARGB8888(rb); - break; - case I915_TILING_X: - intel_XTile_InitPointers_ARGB8888(rb); - break; - case I915_TILING_Y: - intel_YTile_InitPointers_ARGB8888(rb); - break; - } + intelInitPointers_ARGB8888(rb); } else if (rb->_ActualFormat == GL_DEPTH_COMPONENT16) { - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitDepthPointers_z16(rb); - break; - case I915_TILING_X: - intel_XTile_InitDepthPointers_z16(rb); - break; - case I915_TILING_Y: - intel_YTile_InitDepthPointers_z16(rb); - break; - } + intelInitDepthPointers_z16(rb); } else if (rb->_ActualFormat == GL_DEPTH_COMPONENT24 || /* XXX FBO remove */ rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT) { - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitDepthPointers_z24_s8(rb); - break; - case I915_TILING_X: - intel_XTile_InitDepthPointers_z24_s8(rb); - break; - case I915_TILING_Y: - intel_YTile_InitDepthPointers_z24_s8(rb); - break; - } + intelInitDepthPointers_z24_s8(rb); } - else if (rb->_ActualFormat == GL_STENCIL_INDEX8_EXT) { - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitStencilPointers_z24_s8(rb); - break; - case I915_TILING_X: - intel_XTile_InitStencilPointers_z24_s8(rb); - break; - case I915_TILING_Y: - intel_YTile_InitStencilPointers_z24_s8(rb); - break; - } + else if (rb->_ActualFormat == GL_STENCIL_INDEX8_EXT) { /* XXX FBO remove */ + intelInitStencilPointers_z24_s8(rb); } else { _mesa_problem(NULL, diff --git a/src/mesa/drivers/dri/intel/intel_span.h b/src/mesa/drivers/dri/intel/intel_span.h index acbeb4abe1..5201f6d6c6 100644 --- a/src/mesa/drivers/dri/intel/intel_span.h +++ b/src/mesa/drivers/dri/intel/intel_span.h @@ -32,9 +32,7 @@ extern void intelInitSpanFuncs(GLcontext * ctx); extern void intelSpanRenderFinish(GLcontext * ctx); extern void intelSpanRenderStart(GLcontext * ctx); -void intel_renderbuffer_map(struct intel_context *intel, - struct gl_renderbuffer *rb); -void intel_renderbuffer_unmap(struct intel_context *intel, - struct gl_renderbuffer *rb); + +extern void intel_set_span_functions(struct gl_renderbuffer *rb); #endif diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index d0ab464a1c..1add7c6188 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -144,13 +144,15 @@ do_copy_texsubimage(struct intel_context *intel, -src->pitch, src->buffer, src->height * src->pitch * src->cpp, - src->tiling, + GL_FALSE, intelImage->mt->pitch, intelImage->mt->region->buffer, image_offset, - intelImage->mt->region->tiling, + intelImage->mt->region->tiled, x, y + height, dstx, dsty, width, height, GL_COPY); /* ? */ + + intel_batchbuffer_flush(intel->batch); } } diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index c11687a2cf..f261034c18 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -238,6 +238,8 @@ try_pbo_upload(struct intel_context *intel, dst_stride, dst_buffer, dst_offset, GL_FALSE, 0, 0, 0, 0, width, height, GL_COPY); + + intel_batchbuffer_flush(intel->batch); } UNLOCK_HARDWARE(intel); @@ -398,26 +400,11 @@ intelTexImage(GLcontext * ctx, intel_miptree_reference(&intelImage->mt, intelObj->mt); assert(intelImage->mt); - } else if (intelImage->base.Border == 0) { - int comp_byte = 0; - - if (intelImage->base.IsCompressed) { - comp_byte = - intel_compressed_num_bytes(intelImage->base.TexFormat->MesaFormat); - } - - /* Didn't fit in the object miptree, but it's suitable for inclusion in - * a miptree, so create one just for our level and store it in the image. - * It'll get moved into the object miptree at validate time. - */ - intelImage->mt = intel_miptree_create(intel, target, internalFormat, - level, level, - width, height, depth, - intelImage->base.TexFormat->TexelBytes, - comp_byte); - } + if (!intelImage->mt) + DBG("XXX: Image did not fit into tree - storing in local memory!\n"); + /* PBO fastpaths: */ if (dims <= 2 && diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c index 5763f4ae1f..1b3aa89c05 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_validate.c +++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c @@ -125,10 +125,13 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) struct intel_texture_object *intelObj = intel_texture_object(tObj); int comp_byte = 0; int cpp; + GLuint face, i; GLuint nr_faces = 0; struct intel_texture_image *firstImage; + GLboolean need_flush = GL_FALSE; + /* We know/require this is true by now: */ assert(intelObj->base._Complete); @@ -224,10 +227,21 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) */ if (intelObj->mt != intelImage->mt) { copy_image_data_to_tree(intel, intelObj, intelImage); + need_flush = GL_TRUE; } } } +#ifdef I915 + /* XXX: what is this flush about? + * On 965, it causes a batch flush in the middle of the state relocation + * emits, which means that the eventual rendering doesn't have all of the + * required relocations in place. + */ + if (need_flush) + intel_batchbuffer_flush(intel->batch); +#endif + return GL_TRUE; } -- cgit v1.2.3