From f75843a517bd188639e6866db2a7b04de3524e16 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 24 Aug 2008 17:59:10 +1000 Subject: Revert "Revert "Merge branch 'drm-gem'"" This reverts commit 7c81124d7c4a4d1da9f48cbf7e82ab1a3a970a7a. --- src/mesa/drivers/dri/intel/intel_batchbuffer.c | 134 +-- src/mesa/drivers/dri/intel/intel_batchbuffer.h | 24 +- src/mesa/drivers/dri/intel/intel_blit.c | 113 ++- src/mesa/drivers/dri/intel/intel_blit.h | 8 +- src/mesa/drivers/dri/intel/intel_buffer_objects.c | 4 +- src/mesa/drivers/dri/intel/intel_buffers.c | 2 + src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c | 1122 --------------------- src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h | 28 - src/mesa/drivers/dri/intel/intel_context.c | 111 +- src/mesa/drivers/dri/intel/intel_context.h | 14 +- src/mesa/drivers/dri/intel/intel_decode.c | 22 +- src/mesa/drivers/dri/intel/intel_depthstencil.c | 80 +- src/mesa/drivers/dri/intel/intel_fbo.c | 20 +- src/mesa/drivers/dri/intel/intel_fbo.h | 6 +- src/mesa/drivers/dri/intel/intel_ioctl.c | 82 +- src/mesa/drivers/dri/intel/intel_ioctl.h | 16 +- src/mesa/drivers/dri/intel/intel_pixel_bitmap.c | 7 +- src/mesa/drivers/dri/intel/intel_pixel_copy.c | 10 +- src/mesa/drivers/dri/intel/intel_pixel_draw.c | 16 +- src/mesa/drivers/dri/intel/intel_reg.h | 129 +++ src/mesa/drivers/dri/intel/intel_regions.c | 162 ++- src/mesa/drivers/dri/intel/intel_regions.h | 13 +- src/mesa/drivers/dri/intel/intel_screen.c | 115 +-- src/mesa/drivers/dri/intel/intel_screen.h | 2 + src/mesa/drivers/dri/intel/intel_span.c | 676 ++++++++++--- src/mesa/drivers/dri/intel/intel_span.h | 6 +- src/mesa/drivers/dri/intel/intel_tex_copy.c | 6 +- src/mesa/drivers/dri/intel/intel_tex_image.c | 23 +- src/mesa/drivers/dri/intel/intel_tex_validate.c | 14 - 29 files changed, 1162 insertions(+), 1803 deletions(-) delete mode 100644 src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c delete mode 100644 src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h (limited to 'src/mesa/drivers/dri/intel') diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index a594fb6cc4..5afaad070c 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -29,6 +29,7 @@ #include "intel_ioctl.h" #include "intel_decode.h" #include "intel_reg.h" +#include "intel_bufmgr.h" /* Relocations in kernel space: * - pass dma buffer seperately @@ -78,19 +79,21 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch) batch->buf = NULL; } + if (!batch->buffer && intel->ttm == GL_TRUE) + batch->buffer = malloc (intel->maxBatchSize); + batch->buf = dri_bo_alloc(intel->bufmgr, "batchbuffer", - intel->maxBatchSize, 4096, - DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED); - dri_bo_map(batch->buf, GL_TRUE); - batch->map = batch->buf->virtual; + intel->maxBatchSize, 4096); + if (batch->buffer) + batch->map = batch->buffer; + else { + dri_bo_map(batch->buf, GL_TRUE); + batch->map = batch->buf->virtual; + } batch->size = intel->maxBatchSize; batch->ptr = batch->map; batch->dirty_state = ~0; batch->cliprect_mode = IGNORE_CLIPRECTS; - - /* account batchbuffer in aperture */ - dri_bufmgr_check_aperture_space(batch->buf); - } struct intel_batchbuffer * @@ -99,7 +102,6 @@ intel_batchbuffer_alloc(struct intel_context *intel) struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1); batch->intel = intel; - batch->last_fence = NULL; intel_batchbuffer_reset(batch); return batch; @@ -108,14 +110,13 @@ intel_batchbuffer_alloc(struct intel_context *intel) void intel_batchbuffer_free(struct intel_batchbuffer *batch) { - if (batch->last_fence) { - dri_fence_wait(batch->last_fence); - dri_fence_unreference(batch->last_fence); - batch->last_fence = NULL; - } - if (batch->map) { - dri_bo_unmap(batch->buf); - batch->map = NULL; + if (batch->buffer) + free (batch->buffer); + else { + if (batch->map) { + dri_bo_unmap(batch->buf); + batch->map = NULL; + } } dri_bo_unreference(batch->buf); batch->buf = NULL; @@ -131,11 +132,12 @@ do_flush_locked(struct intel_batchbuffer *batch, GLuint used, GLboolean allow_unlock) { struct intel_context *intel = batch->intel; - void *start; - GLuint count; + int ret = 0; - dri_bo_unmap(batch->buf); - start = dri_process_relocs(batch->buf, &count); + if (batch->buffer) + dri_bo_subdata (batch->buf, 0, used, batch->buffer); + else + dri_bo_unmap(batch->buf); batch->map = NULL; batch->ptr = NULL; @@ -148,21 +150,25 @@ do_flush_locked(struct intel_batchbuffer *batch, if (!(intel->numClipRects == 0 && batch->cliprect_mode == LOOP_CLIPRECTS)) { if (intel->ttm == GL_TRUE) { - intel_exec_ioctl(batch->intel, - used, - batch->cliprect_mode != LOOP_CLIPRECTS, - allow_unlock, - start, count, &batch->last_fence); + struct drm_i915_gem_execbuffer *execbuf; + + execbuf = dri_process_relocs(batch->buf); + ret = intel_exec_ioctl(batch->intel, + used, + batch->cliprect_mode != LOOP_CLIPRECTS, + allow_unlock, + execbuf); } else { - intel_batch_ioctl(batch->intel, - batch->buf->offset, - used, - batch->cliprect_mode != LOOP_CLIPRECTS, - allow_unlock); + dri_process_relocs(batch->buf); + ret = intel_batch_ioctl(batch->intel, + batch->buf->offset, + used, + batch->cliprect_mode != LOOP_CLIPRECTS, + allow_unlock); } } - - dri_post_submit(batch->buf, &batch->last_fence); + + dri_post_submit(batch->buf); if (intel->numClipRects == 0 && batch->cliprect_mode == LOOP_CLIPRECTS) { @@ -187,6 +193,10 @@ do_flush_locked(struct intel_batchbuffer *batch, intel->vtbl.debug_batch(intel); } + if (ret != 0) { + UNLOCK_HARDWARE(intel); + exit(1); + } intel->vtbl.new_batch(intel); } @@ -204,21 +214,27 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, if (INTEL_DEBUG & DEBUG_BATCH) fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line, used); - /* Add the MI_BATCH_BUFFER_END. Always add an MI_FLUSH - this is a - * performance drain that we would like to avoid. - */ - if (used & 4) { - ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd(); - ((int *) batch->ptr)[1] = 0; - ((int *) batch->ptr)[2] = MI_BATCH_BUFFER_END; - used += 12; + + /* Emit a flush if the bufmgr doesn't do it for us. */ + if (!intel->ttm) { + *(GLuint *) (batch->ptr) = intel->vtbl.flush_cmd(); + batch->ptr += 4; + used = batch->ptr - batch->map; } - else { - ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd(); - ((int *) batch->ptr)[1] = MI_BATCH_BUFFER_END; - used += 8; + + /* Round batchbuffer usage to 2 DWORDs. */ + + if ((used & 4) == 0) { + *(GLuint *) (batch->ptr) = 0; /* noop */ + batch->ptr += 4; + used = batch->ptr - batch->map; } + /* Mark the end of the buffer. */ + *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END; /* noop */ + batch->ptr += 4; + used = batch->ptr - batch->map; + /* Workaround for recursive batchbuffer flushing: If the window is * moved, we can get into a case where we try to flush during a * flush. What happens is that when we try to grab the lock for @@ -230,6 +246,9 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, * avoid that in the first place. */ batch->ptr = batch->map; + if (intel->vtbl.finish_batch) + intel->vtbl.finish_batch(intel); + /* TODO: Just pass the relocation list and dma buffer up to the * kernel. */ @@ -242,9 +261,13 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, UNLOCK_HARDWARE(intel); if (INTEL_DEBUG & DEBUG_SYNC) { + int irq; + fprintf(stderr, "waiting for idle\n"); - if (batch->last_fence != NULL) - dri_fence_wait(batch->last_fence); + LOCK_HARDWARE(intel); + irq = intelEmitIrqLocked(intel); + UNLOCK_HARDWARE(intel); + intelWaitIrq(intel, irq); } /* Reset the buffer: @@ -252,25 +275,22 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, intel_batchbuffer_reset(batch); } -void -intel_batchbuffer_finish(struct intel_batchbuffer *batch) -{ - intel_batchbuffer_flush(batch); - if (batch->last_fence != NULL) - dri_fence_wait(batch->last_fence); -} - /* This is the only way buffers get added to the validate list. */ GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, dri_bo *buffer, - GLuint flags, GLuint delta) + uint32_t read_domains, uint32_t write_domain, + uint32_t delta) { int ret; - ret = dri_emit_reloc(batch->buf, flags, delta, batch->ptr - batch->map, buffer); + if (batch->ptr - batch->map > batch->buf->size) + _mesa_printf ("bad relocation ptr %p map %p offset %d size %d\n", + batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size); + ret = intel_bo_emit_reloc(batch->buf, read_domains, write_domain, + delta, batch->ptr - batch->map, buffer); /* * Using the old buffer offset, write in what the right data would be, in case diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h index 0da602010e..52d6ecc223 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -4,6 +4,7 @@ #include "mtypes.h" #include "dri_bufmgr.h" +#include "intel_reg.h" struct intel_context; @@ -40,7 +41,8 @@ struct intel_batchbuffer struct intel_context *intel; dri_bo *buf; - dri_fence *last_fence; + + GLubyte *buffer; GLubyte *map; GLubyte *ptr; @@ -58,8 +60,6 @@ struct intel_batchbuffer *intel_batchbuffer_alloc(struct intel_context void intel_batchbuffer_free(struct intel_batchbuffer *batch); -void intel_batchbuffer_finish(struct intel_batchbuffer *batch); - void _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, int line); @@ -82,14 +82,16 @@ void intel_batchbuffer_release_space(struct intel_batchbuffer *batch, GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, dri_bo *buffer, - GLuint flags, GLuint offset); + uint32_t read_domains, + uint32_t write_domain, + uint32_t offset); /* Inline functions - might actually be better off with these * non-inlined. Certainly better off switching all command packets to * be passed as structs rather than dwords, but that's a little bit of * work... */ -static INLINE GLuint +static INLINE GLint intel_batchbuffer_space(struct intel_batchbuffer *batch) { return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map); @@ -136,12 +138,20 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch, #define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d) -#define OUT_RELOC(buf, cliprect_mode, delta) do { \ +#define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ assert((delta) >= 0); \ - intel_batchbuffer_emit_reloc(intel->batch, buf, cliprect_mode, delta); \ + intel_batchbuffer_emit_reloc(intel->batch, buf, \ + read_domains, write_domain, delta); \ } while (0) #define ADVANCE_BATCH() do { } while(0) +static INLINE void +intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch) +{ + intel_batchbuffer_require_space(batch, 4, IGNORE_CLIPRECTS); + intel_batchbuffer_emit_dword(batch, MI_FLUSH); +} + #endif diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index 25ac609f13..7129a4ba91 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -54,7 +54,6 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, struct intel_context *intel; const intelScreenPrivate *intelScreen; - int ret; DBG("%s\n", __FUNCTION__); @@ -66,14 +65,6 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, intelScreen = intel->intelScreen; - if (intel->last_swap_fence) { - dri_fence_wait(intel->last_swap_fence); - dri_fence_unreference(intel->last_swap_fence); - intel->last_swap_fence = NULL; - } - intel->last_swap_fence = intel->first_swap_fence; - intel->first_swap_fence = NULL; - /* The LOCK_HARDWARE is required for the cliprects. Buffer offsets * should work regardless. */ @@ -89,6 +80,7 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, unsigned short src_x, src_y; int BR13, CMD; int i; + dri_bo *aper_array[3]; src = intel_get_rb_region(&intel_fb->Base, BUFFER_BACK_LEFT); dst = intel_get_rb_region(&intel_fb->Base, BUFFER_FRONT_LEFT); @@ -114,26 +106,28 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, } #ifndef I915 - if (src->tiled) { + if (src->tiling != I915_TILING_NONE) { CMD |= XY_SRC_TILED; src_pitch /= 4; } - if (dst->tiled) { + if (dst->tiling != I915_TILING_NONE) { CMD |= XY_DST_TILED; dst_pitch /= 4; } #endif /* do space/cliprects check before going any further */ - intel_batchbuffer_require_space(intel->batch, 8 * 4, REFERENCES_CLIPRECTS); + intel_batchbuffer_require_space(intel->batch, 8 * 4, + REFERENCES_CLIPRECTS); again: - ret = dri_bufmgr_check_aperture_space(dst->buffer); - ret |= dri_bufmgr_check_aperture_space(src->buffer); - - if (ret) { + aper_array[0] = intel->batch->buf; + aper_array[1] = dst->buffer; + aper_array[2] = src->buffer; + + if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) { intel_batchbuffer_flush(intel->batch); goto again; } - + for (i = 0; i < nbox; i++, pbox++) { drm_clip_rect_t box = *pbox; @@ -157,19 +151,22 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, OUT_BATCH((box.y1 << 16) | box.x1); OUT_BATCH((box.y2 << 16) | box.x2); - OUT_RELOC(dst->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, 0); + OUT_RELOC(dst->buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0); OUT_BATCH((src_y << 16) | src_x); OUT_BATCH(src_pitch); - OUT_RELOC(src->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0); + OUT_RELOC(src->buffer, + I915_GEM_DOMAIN_RENDER, 0, + 0); ADVANCE_BATCH(); } - if (intel->first_swap_fence) - dri_fence_unreference(intel->first_swap_fence); + /* Flush the rendering and the batch so that the results all land on the + * screen in a timely fashion. + */ + intel_batchbuffer_emit_mi_flush(intel->batch); intel_batchbuffer_flush(intel->batch); - intel->first_swap_fence = intel->batch->last_fence; - if (intel->first_swap_fence) - dri_fence_reference(intel->first_swap_fence); } UNLOCK_HARDWARE(intel); @@ -184,7 +181,7 @@ intelEmitFillBlit(struct intel_context *intel, GLshort dst_pitch, dri_bo *dst_buffer, GLuint dst_offset, - GLboolean dst_tiled, + uint32_t dst_tiling, GLshort x, GLshort y, GLshort w, GLshort h, GLuint color) @@ -209,7 +206,7 @@ intelEmitFillBlit(struct intel_context *intel, return; } #ifndef I915 - if (dst_tiled) { + if (dst_tiling != I915_TILING_NONE) { CMD |= XY_DST_TILED; dst_pitch /= 4; } @@ -226,7 +223,9 @@ intelEmitFillBlit(struct intel_context *intel, OUT_BATCH(BR13 | dst_pitch); OUT_BATCH((y << 16) | x); OUT_BATCH(((y + h) << 16) | (x + w)); - OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset); + OUT_RELOC(dst_buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + dst_offset); OUT_BATCH(color); ADVANCE_BATCH(); } @@ -263,11 +262,11 @@ intelEmitCopyBlit(struct intel_context *intel, GLshort src_pitch, dri_bo *src_buffer, GLuint src_offset, - GLboolean src_tiled, + uint32_t src_tiling, GLshort dst_pitch, dri_bo *dst_buffer, GLuint dst_offset, - GLboolean dst_tiled, + uint32_t dst_tiling, GLshort src_x, GLshort src_y, GLshort dst_x, GLshort dst_y, GLshort w, GLshort h, @@ -276,17 +275,19 @@ intelEmitCopyBlit(struct intel_context *intel, GLuint CMD, BR13; int dst_y2 = dst_y + h; int dst_x2 = dst_x + w; - int ret; + dri_bo *aper_array[3]; BATCH_LOCALS; /* do space/cliprects check before going any further */ intel_batchbuffer_require_space(intel->batch, 8 * 4, NO_LOOP_CLIPRECTS); again: - ret = dri_bufmgr_check_aperture_space(dst_buffer); - ret |= dri_bufmgr_check_aperture_space(src_buffer); - if (ret) { - intel_batchbuffer_flush(intel->batch); - goto again; + aper_array[0] = intel->batch->buf; + aper_array[1] = dst_buffer; + aper_array[2] = src_buffer; + + if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) { + intel_batchbuffer_flush(intel->batch); + goto again; } DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", @@ -315,11 +316,11 @@ intelEmitCopyBlit(struct intel_context *intel, } #ifndef I915 - if (dst_tiled) { + if (dst_tiling != I915_TILING_NONE) { CMD |= XY_DST_TILED; dst_pitch /= 4; } - if (src_tiled) { + if (src_tiling != I915_TILING_NONE) { CMD |= XY_SRC_TILED; src_pitch /= 4; } @@ -345,11 +346,13 @@ intelEmitCopyBlit(struct intel_context *intel, OUT_BATCH(BR13 | dst_pitch); OUT_BATCH((dst_y << 16) | dst_x); OUT_BATCH((dst_y2 << 16) | dst_x2); - OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, + OUT_RELOC(dst_buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, dst_offset); OUT_BATCH((src_y << 16) | src_x); OUT_BATCH(src_pitch); - OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + OUT_RELOC(src_buffer, + I915_GEM_DOMAIN_RENDER, 0, src_offset); ADVANCE_BATCH(); } @@ -362,14 +365,17 @@ intelEmitCopyBlit(struct intel_context *intel, OUT_BATCH(BR13 | ((uint16_t)dst_pitch)); OUT_BATCH((0 << 16) | dst_x); OUT_BATCH((h << 16) | dst_x2); - OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, + OUT_RELOC(dst_buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, dst_offset + dst_y * dst_pitch); OUT_BATCH((0 << 16) | src_x); OUT_BATCH(src_pitch); - OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + OUT_RELOC(src_buffer, + I915_GEM_DOMAIN_RENDER, 0, src_offset + src_y * src_pitch); ADVANCE_BATCH(); } + intel_batchbuffer_emit_mi_flush(intel->batch); } @@ -513,7 +519,7 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) } #ifndef I915 - if (irb_region->tiled) { + if (irb_region->tiling != I915_TILING_NONE) { CMD |= XY_DST_TILED; pitch /= 4; } @@ -541,7 +547,8 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) OUT_BATCH(BR13); OUT_BATCH((b.y1 << 16) | b.x1); OUT_BATCH((b.y2 << 16) | b.x2); - OUT_RELOC(write_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, + OUT_RELOC(write_buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, irb_region->draw_offset); OUT_BATCH(clearVal); ADVANCE_BATCH(); @@ -549,7 +556,7 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) } } } - intel_batchbuffer_flush(intel->batch); + intel_batchbuffer_emit_mi_flush(intel->batch); } UNLOCK_HARDWARE(intel); @@ -563,7 +570,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, GLshort dst_pitch, dri_bo *dst_buffer, GLuint dst_offset, - GLboolean dst_tiled, + uint32_t dst_tiling, GLshort x, GLshort y, GLshort w, GLshort h, GLenum logic_op) @@ -587,13 +594,13 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, (8 * 4) + (3 * 4) + dwords, - NO_LOOP_CLIPRECTS ); + REFERENCES_CLIPRECTS ); opcode = XY_SETUP_BLT_CMD; if (cpp == 4) opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; #ifndef I915 - if (dst_tiled) { + if (dst_tiling != I915_TILING_NONE) { opcode |= XY_DST_TILED; dst_pitch /= 4; } @@ -606,15 +613,17 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, br13 |= BR13_8888; blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */ - if (dst_tiled) + if (dst_tiling != I915_TILING_NONE) blit_cmd |= XY_DST_TILED; - BEGIN_BATCH(8 + 3, NO_LOOP_CLIPRECTS); + BEGIN_BATCH(8 + 3, REFERENCES_CLIPRECTS); OUT_BATCH(opcode); OUT_BATCH(br13); OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */ OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */ - OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset); + OUT_RELOC(dst_buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + dst_offset); OUT_BATCH(0); /* bg */ OUT_BATCH(fg_color); /* fg */ OUT_BATCH(0); /* pattern base addr */ @@ -627,5 +636,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, intel_batchbuffer_data( intel->batch, src_bits, dwords * 4, - NO_LOOP_CLIPRECTS ); + REFERENCES_CLIPRECTS ); + + intel_batchbuffer_emit_mi_flush(intel->batch); } diff --git a/src/mesa/drivers/dri/intel/intel_blit.h b/src/mesa/drivers/dri/intel/intel_blit.h index fc0620caba..0881cc4fdc 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.h +++ b/src/mesa/drivers/dri/intel/intel_blit.h @@ -42,11 +42,11 @@ extern void intelEmitCopyBlit(struct intel_context *intel, GLshort src_pitch, dri_bo *src_buffer, GLuint src_offset, - GLboolean src_tiled, + uint32_t src_tiling, GLshort dst_pitch, dri_bo *dst_buffer, GLuint dst_offset, - GLboolean dst_tiled, + uint32_t dst_tiling, GLshort srcx, GLshort srcy, GLshort dstx, GLshort dsty, GLshort w, GLshort h, @@ -57,7 +57,7 @@ extern void intelEmitFillBlit(struct intel_context *intel, GLshort dst_pitch, dri_bo *dst_buffer, GLuint dst_offset, - GLboolean dst_tiled, + uint32_t dst_tiling, GLshort x, GLshort y, GLshort w, GLshort h, GLuint color); @@ -69,7 +69,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, GLshort dst_pitch, dri_bo *dst_buffer, GLuint dst_offset, - GLboolean dst_tiled, + uint32_t dst_tiling, GLshort x, GLshort y, GLshort w, GLshort h, GLenum logic_op); diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index 951b8cbfb7..1923a21516 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -32,6 +32,7 @@ #include "intel_context.h" #include "intel_buffer_objects.h" +#include "intel_batchbuffer.h" #include "intel_regions.h" #include "dri_bufmgr.h" @@ -45,8 +46,7 @@ intel_bufferobj_alloc_buffer(struct intel_context *intel, struct intel_buffer_object *intel_obj) { intel_obj->buffer = dri_bo_alloc(intel->bufmgr, "bufferobj", - intel_obj->Base.Size, 64, - DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED); + intel_obj->Base.Size, 64); } /** diff --git a/src/mesa/drivers/dri/intel/intel_buffers.c b/src/mesa/drivers/dri/intel/intel_buffers.c index 75542a9775..77352e6a53 100644 --- a/src/mesa/drivers/dri/intel/intel_buffers.c +++ b/src/mesa/drivers/dri/intel/intel_buffers.c @@ -819,6 +819,8 @@ intelSwapBuffers(__DRIdrawablePrivate * dPriv) intel_fb->swap_ust = ust; } + drmCommandNone(intel->driFd, DRM_I915_GEM_THROTTLE); + } else { /* XXX this shouldn't be an error but we can't handle it for now */ diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c deleted file mode 100644 index 194814e8fb..0000000000 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c +++ /dev/null @@ -1,1122 +0,0 @@ -/************************************************************************** - * - * Copyright © 2007 Red Hat Inc. - * Copyright © 2007 Intel Corporation - * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * - **************************************************************************/ -/* - * Authors: Thomas Hellström - * Keith Whitwell - * Eric Anholt - * Dave Airlie - */ - -#include -#include -#include -#include -#include -#include - -#include "errno.h" -#include "mtypes.h" -#include "dri_bufmgr.h" -#include "string.h" -#include "imports.h" - -#include "i915_drm.h" - -#include "intel_bufmgr_ttm.h" -#ifdef TTM_API - -#define DBG(...) do { \ - if (bufmgr_ttm->bufmgr.debug) \ - fprintf(stderr, __VA_ARGS__); \ -} while (0) - -/* - * These bits are always specified in each validation - * request. Other bits are not supported at this point - * as it would require a bit of investigation to figure - * out what mask value should be used. - */ -#define INTEL_BO_MASK (DRM_BO_MASK_MEM | \ - DRM_BO_FLAG_READ | \ - DRM_BO_FLAG_WRITE | \ - DRM_BO_FLAG_EXE) - -struct intel_validate_entry { - dri_bo *bo; - struct drm_i915_op_arg bo_arg; -}; - -struct dri_ttm_bo_bucket_entry { - drmBO drm_bo; - struct dri_ttm_bo_bucket_entry *next; -}; - -struct dri_ttm_bo_bucket { - struct dri_ttm_bo_bucket_entry *head; - struct dri_ttm_bo_bucket_entry **tail; - /** - * Limit on the number of entries in this bucket. - * - * 0 means that this caching at this bucket size is disabled. - * -1 means that there is no limit to caching at this size. - */ - int max_entries; - int num_entries; -}; - -/* Arbitrarily chosen, 16 means that the maximum size we'll cache for reuse - * is 1 << 16 pages, or 256MB. - */ -#define INTEL_TTM_BO_BUCKETS 16 -typedef struct _dri_bufmgr_ttm { - dri_bufmgr bufmgr; - - int fd; - unsigned int fence_type; - unsigned int fence_type_flush; - - uint32_t max_relocs; - - struct intel_validate_entry *validate_array; - int validate_array_size; - int validate_count; - - /** Array of lists of cached drmBOs of power-of-two sizes */ - struct dri_ttm_bo_bucket cache_bucket[INTEL_TTM_BO_BUCKETS]; -} dri_bufmgr_ttm; - -/** - * Private information associated with a relocation that isn't already stored - * in the relocation buffer to be passed to the kernel. - */ -struct dri_ttm_reloc { - dri_bo *target_buf; - uint64_t validate_flags; - /** Offset of target_buf after last execution of this relocation entry. */ - unsigned int last_target_offset; -}; - -typedef struct _dri_bo_ttm { - dri_bo bo; - - int refcount; - unsigned int map_count; - drmBO drm_bo; - const char *name; - - uint64_t last_flags; - - /** - * Index of the buffer within the validation list while preparing a - * batchbuffer execution. - */ - int validate_index; - - /** DRM buffer object containing relocation list */ - uint32_t *reloc_buf_data; - struct dri_ttm_reloc *relocs; - - /** - * Indicates that the buffer may be shared with other processes, so we - * can't hold maps beyond when the user does. - */ - GLboolean shared; - - GLboolean delayed_unmap; - /* Virtual address from the dri_bo_map whose unmap was delayed. */ - void *saved_virtual; -} dri_bo_ttm; - -typedef struct _dri_fence_ttm -{ - dri_fence fence; - - int refcount; - const char *name; - drmFence drm_fence; -} dri_fence_ttm; - -static int -logbase2(int n) -{ - GLint i = 1; - GLint log2 = 0; - - while (n > i) { - i *= 2; - log2++; - } - - return log2; -} - -static struct dri_ttm_bo_bucket * -dri_ttm_bo_bucket_for_size(dri_bufmgr_ttm *bufmgr_ttm, unsigned long size) -{ - int i; - - /* We only do buckets in power of two increments */ - if ((size & (size - 1)) != 0) - return NULL; - - /* We should only see sizes rounded to pages. */ - assert((size % 4096) == 0); - - /* We always allocate in units of pages */ - i = ffs(size / 4096) - 1; - if (i >= INTEL_TTM_BO_BUCKETS) - return NULL; - - return &bufmgr_ttm->cache_bucket[i]; -} - - -static void dri_ttm_dump_validation_list(dri_bufmgr_ttm *bufmgr_ttm) -{ - int i, j; - - for (i = 0; i < bufmgr_ttm->validate_count; i++) { - dri_bo *bo = bufmgr_ttm->validate_array[i].bo; - dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; - - if (bo_ttm->reloc_buf_data != NULL) { - for (j = 0; j < (bo_ttm->reloc_buf_data[0] & 0xffff); j++) { - uint32_t *reloc_entry = bo_ttm->reloc_buf_data + - I915_RELOC_HEADER + - j * I915_RELOC0_STRIDE; - dri_bo *target_bo = bo_ttm->relocs[j].target_buf; - dri_bo_ttm *target_ttm = (dri_bo_ttm *)target_bo; - - DBG("%2d: %s@0x%08x -> %s@0x%08lx + 0x%08x\n", - i, - bo_ttm->name, reloc_entry[0], - target_ttm->name, target_bo->offset, - reloc_entry[1]); - } - } else { - DBG("%2d: %s\n", i, bo_ttm->name); - } - } -} - -/** - * Adds the given buffer to the list of buffers to be validated (moved into the - * appropriate memory type) with the next batch submission. - * - * If a buffer is validated multiple times in a batch submission, it ends up - * with the intersection of the memory type flags and the union of the - * access flags. - */ -static void -intel_add_validate_buffer(dri_bo *buf, - uint64_t flags) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr; - dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; - - /* If we delayed doing an unmap to mitigate map/unmap syscall thrashing, - * do that now. - */ - if (ttm_buf->delayed_unmap) { - drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo); - ttm_buf->delayed_unmap = GL_FALSE; - } - - if (ttm_buf->validate_index == -1) { - struct intel_validate_entry *entry; - struct drm_i915_op_arg *arg; - struct drm_bo_op_req *req; - int index; - - /* Extend the array of validation entries as necessary. */ - if (bufmgr_ttm->validate_count == bufmgr_ttm->validate_array_size) { - int i, new_size = bufmgr_ttm->validate_array_size * 2; - - if (new_size == 0) - new_size = 5; - - bufmgr_ttm->validate_array = - realloc(bufmgr_ttm->validate_array, - sizeof(struct intel_validate_entry) * new_size); - bufmgr_ttm->validate_array_size = new_size; - - /* Update pointers for realloced mem. */ - for (i = 0; i < bufmgr_ttm->validate_count - 1; i++) { - bufmgr_ttm->validate_array[i].bo_arg.next = (unsigned long) - &bufmgr_ttm->validate_array[i + 1].bo_arg; - } - } - - /* Pick out the new array entry for ourselves */ - index = bufmgr_ttm->validate_count; - ttm_buf->validate_index = index; - entry = &bufmgr_ttm->validate_array[index]; - bufmgr_ttm->validate_count++; - - /* Fill in array entry */ - entry->bo = buf; - dri_bo_reference(buf); - - /* Fill in kernel arg */ - arg = &entry->bo_arg; - req = &arg->d.req; - - memset(arg, 0, sizeof(*arg)); - req->bo_req.handle = ttm_buf->drm_bo.handle; - req->op = drm_bo_validate; - req->bo_req.flags = flags; - req->bo_req.hint = 0; -#ifdef DRM_BO_HINT_PRESUMED_OFFSET - /* PRESUMED_OFFSET indicates that all relocations pointing at this - * buffer have the correct offset. If any of our relocations don't, - * this flag will be cleared off the buffer later in the relocation - * processing. - */ - req->bo_req.hint |= DRM_BO_HINT_PRESUMED_OFFSET; - req->bo_req.presumed_offset = buf->offset; -#endif - req->bo_req.mask = INTEL_BO_MASK; - req->bo_req.fence_class = 0; /* Backwards compat. */ - - if (ttm_buf->reloc_buf_data != NULL) - arg->reloc_ptr = (unsigned long)(void *)ttm_buf->reloc_buf_data; - else - arg->reloc_ptr = 0; - - /* Hook up the linked list of args for the kernel */ - arg->next = 0; - if (index != 0) { - bufmgr_ttm->validate_array[index - 1].bo_arg.next = - (unsigned long)arg; - } - } else { - struct intel_validate_entry *entry = - &bufmgr_ttm->validate_array[ttm_buf->validate_index]; - struct drm_i915_op_arg *arg = &entry->bo_arg; - struct drm_bo_op_req *req = &arg->d.req; - uint64_t memFlags = req->bo_req.flags & flags & DRM_BO_MASK_MEM; - uint64_t modeFlags = (req->bo_req.flags | flags) & ~DRM_BO_MASK_MEM; - - /* Buffer was already in the validate list. Extend its flags as - * necessary. - */ - - if (memFlags == 0) { - fprintf(stderr, - "%s: No shared memory types between " - "0x%16llx and 0x%16llx\n", - __FUNCTION__, req->bo_req.flags, flags); - abort(); - } - if (flags & ~INTEL_BO_MASK) { - fprintf(stderr, - "%s: Flags bits 0x%16llx are not supposed to be used in a relocation\n", - __FUNCTION__, flags & ~INTEL_BO_MASK); - abort(); - } - req->bo_req.flags = memFlags | modeFlags; - } -} - - -#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ - sizeof(uint32_t)) - -static int -intel_setup_reloc_list(dri_bo *bo) -{ - dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bo->bufmgr; - - bo_ttm->relocs = calloc(bufmgr_ttm->max_relocs, - sizeof(struct dri_ttm_reloc)); - bo_ttm->reloc_buf_data = calloc(1, RELOC_BUF_SIZE(bufmgr_ttm->max_relocs)); - - /* Initialize the relocation list with the header: - * DWORD 0: relocation count - * DWORD 1: relocation type - * DWORD 2+3: handle to next relocation list (currently none) 64-bits - */ - bo_ttm->reloc_buf_data[0] = 0; - bo_ttm->reloc_buf_data[1] = I915_RELOC_TYPE_0; - bo_ttm->reloc_buf_data[2] = 0; - bo_ttm->reloc_buf_data[3] = 0; - - return 0; -} - -#if 0 -int -driFenceSignaled(DriFenceObject * fence, unsigned type) -{ - int signaled; - int ret; - - if (fence == NULL) - return GL_TRUE; - - ret = drmFenceSignaled(bufmgr_ttm->fd, &fence->fence, type, &signaled); - BM_CKFATAL(ret); - return signaled; -} -#endif - -static dri_bo * -dri_ttm_alloc(dri_bufmgr *bufmgr, const char *name, - unsigned long size, unsigned int alignment, - uint64_t location_mask) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; - dri_bo_ttm *ttm_buf; - unsigned int pageSize = getpagesize(); - int ret; - uint64_t flags; - unsigned int hint; - unsigned long alloc_size; - struct dri_ttm_bo_bucket *bucket; - GLboolean alloc_from_cache = GL_FALSE; - - ttm_buf = calloc(1, sizeof(*ttm_buf)); - if (!ttm_buf) - return NULL; - - /* The mask argument doesn't do anything for us that we want other than - * determine which pool (TTM or local) the buffer is allocated into, so - * just pass all of the allocation class flags. - */ - flags = location_mask | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE | - DRM_BO_FLAG_EXE; - /* No hints we want to use. */ - hint = 0; - - /* Round the allocated size up to a power of two number of pages. */ - alloc_size = 1 << logbase2(size); - if (alloc_size < pageSize) - alloc_size = pageSize; - bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, alloc_size); - - /* If we don't have caching at this size, don't actually round the - * allocation up. - */ - if (bucket == NULL || bucket->max_entries == 0) - alloc_size = size; - - /* Get a buffer out of the cache if available */ - if (bucket != NULL && bucket->num_entries > 0) { - struct dri_ttm_bo_bucket_entry *entry = bucket->head; - int busy; - - /* Check if the buffer is still in flight. If not, reuse it. */ - ret = drmBOBusy(bufmgr_ttm->fd, &entry->drm_bo, &busy); - alloc_from_cache = (ret == 0 && busy == 0); - - if (alloc_from_cache) { - bucket->head = entry->next; - if (entry->next == NULL) - bucket->tail = &bucket->head; - bucket->num_entries--; - - ttm_buf->drm_bo = entry->drm_bo; - free(entry); - } - } - - if (!alloc_from_cache) { - ret = drmBOCreate(bufmgr_ttm->fd, alloc_size, alignment / pageSize, - NULL, flags, hint, &ttm_buf->drm_bo); - if (ret != 0) { - free(ttm_buf); - return NULL; - } - } - - ttm_buf->bo.size = size; - ttm_buf->bo.offset = ttm_buf->drm_bo.offset; - ttm_buf->bo.virtual = NULL; - ttm_buf->bo.bufmgr = bufmgr; - ttm_buf->name = name; - ttm_buf->refcount = 1; - ttm_buf->reloc_buf_data = NULL; - ttm_buf->relocs = NULL; - ttm_buf->last_flags = ttm_buf->drm_bo.flags; - ttm_buf->shared = GL_FALSE; - ttm_buf->delayed_unmap = GL_FALSE; - ttm_buf->validate_index = -1; - - DBG("bo_create: %p (%s) %ldb\n", &ttm_buf->bo, ttm_buf->name, size); - - return &ttm_buf->bo; -} - -/* Our TTM backend doesn't allow creation of static buffers, as that requires - * privelege for the non-fake case, and the lock in the fake case where we were - * working around the X Server not creating buffers and passing handles to us. - */ -static dri_bo * -dri_ttm_alloc_static(dri_bufmgr *bufmgr, const char *name, - unsigned long offset, unsigned long size, void *virtual, - uint64_t location_mask) -{ - return NULL; -} - -/** - * Returns a dri_bo wrapping the given buffer object handle. - * - * This can be used when one application needs to pass a buffer object - * to another. - */ -dri_bo * -intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, - unsigned int handle) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; - dri_bo_ttm *ttm_buf; - int ret; - - ttm_buf = calloc(1, sizeof(*ttm_buf)); - if (!ttm_buf) - return NULL; - - ret = drmBOReference(bufmgr_ttm->fd, handle, &ttm_buf->drm_bo); - if (ret != 0) { - fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n", - name, handle, strerror(-ret)); - free(ttm_buf); - return NULL; - } - ttm_buf->bo.size = ttm_buf->drm_bo.size; - ttm_buf->bo.offset = ttm_buf->drm_bo.offset; - ttm_buf->bo.virtual = NULL; - ttm_buf->bo.bufmgr = bufmgr; - ttm_buf->name = name; - ttm_buf->refcount = 1; - ttm_buf->reloc_buf_data = NULL; - ttm_buf->relocs = NULL; - ttm_buf->last_flags = ttm_buf->drm_bo.flags; - ttm_buf->shared = GL_TRUE; - ttm_buf->delayed_unmap = GL_FALSE; - ttm_buf->validate_index = -1; - - DBG("bo_create_from_handle: %p %08x (%s)\n", - &ttm_buf->bo, handle, ttm_buf->name); - - return &ttm_buf->bo; -} - -static void -dri_ttm_bo_reference(dri_bo *buf) -{ - dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; - - ttm_buf->refcount++; -} - -static void -dri_ttm_bo_unreference(dri_bo *buf) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr; - dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; - - if (!buf) - return; - - if (--ttm_buf->refcount == 0) { - struct dri_ttm_bo_bucket *bucket; - int ret; - - assert(ttm_buf->map_count == 0); - - if (ttm_buf->reloc_buf_data) { - int i; - - /* Unreference all the target buffers */ - for (i = 0; i < (ttm_buf->reloc_buf_data[0] & 0xffff); i++) - dri_bo_unreference(ttm_buf->relocs[i].target_buf); - free(ttm_buf->relocs); - - /* Free the kernel BO containing relocation entries */ - free(ttm_buf->reloc_buf_data); - ttm_buf->reloc_buf_data = NULL; - } - - if (ttm_buf->delayed_unmap) { - int ret = drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo); - - if (ret != 0) { - fprintf(stderr, "%s:%d: Error unmapping buffer %s: %s.\n", - __FILE__, __LINE__, ttm_buf->name, strerror(-ret)); - } - } - - bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, ttm_buf->drm_bo.size); - /* Put the buffer into our internal cache for reuse if we can. */ - if (!ttm_buf->shared && - bucket != NULL && - (bucket->max_entries == -1 || - (bucket->max_entries > 0 && - bucket->num_entries < bucket->max_entries))) - { - struct dri_ttm_bo_bucket_entry *entry; - - entry = calloc(1, sizeof(*entry)); - entry->drm_bo = ttm_buf->drm_bo; - - entry->next = NULL; - *bucket->tail = entry; - bucket->tail = &entry->next; - bucket->num_entries++; - } else { - /* Decrement the kernel refcount for the buffer. */ - ret = drmBOUnreference(bufmgr_ttm->fd, &ttm_buf->drm_bo); - if (ret != 0) { - fprintf(stderr, "drmBOUnreference failed (%s): %s\n", - ttm_buf->name, strerror(-ret)); - } - } - - DBG("bo_unreference final: %p (%s)\n", &ttm_buf->bo, ttm_buf->name); - - free(buf); - return; - } -} - -static int -dri_ttm_bo_map(dri_bo *buf, GLboolean write_enable) -{ - dri_bufmgr_ttm *bufmgr_ttm; - dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; - uint64_t flags; - int ret; - - bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr; - - flags = DRM_BO_FLAG_READ; - if (write_enable) - flags |= DRM_BO_FLAG_WRITE; - - /* Allow recursive mapping. Mesa may recursively map buffers with - * nested display loops. - */ - if (ttm_buf->map_count++ != 0) - return 0; - - assert(buf->virtual == NULL); - - DBG("bo_map: %p (%s)\n", &ttm_buf->bo, ttm_buf->name); - - /* XXX: What about if we're upgrading from READ to WRITE? */ - if (ttm_buf->delayed_unmap) { - buf->virtual = ttm_buf->saved_virtual; - return 0; - } - - ret = drmBOMap(bufmgr_ttm->fd, &ttm_buf->drm_bo, flags, 0, &buf->virtual); - if (ret != 0) { - fprintf(stderr, "%s:%d: Error mapping buffer %s: %s .\n", - __FILE__, __LINE__, ttm_buf->name, strerror(-ret)); - } - - return ret; -} - -static int -dri_ttm_bo_unmap(dri_bo *buf) -{ - dri_bufmgr_ttm *bufmgr_ttm; - dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; - int ret; - - if (buf == NULL) - return 0; - - assert(ttm_buf->map_count != 0); - if (--ttm_buf->map_count != 0) - return 0; - - bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr; - - assert(buf->virtual != NULL); - - DBG("bo_unmap: %p (%s)\n", &ttm_buf->bo, ttm_buf->name); - - if (!ttm_buf->shared) { - ttm_buf->saved_virtual = buf->virtual; - ttm_buf->delayed_unmap = GL_TRUE; - buf->virtual = NULL; - - return 0; - } - - buf->virtual = NULL; - - ret = drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo); - if (ret != 0) { - fprintf(stderr, "%s:%d: Error unmapping buffer %s: %s.\n", - __FILE__, __LINE__, ttm_buf->name, strerror(-ret)); - } - - return ret; -} - -/** - * Returns a dri_bo wrapping the given buffer object handle. - * - * This can be used when one application needs to pass a buffer object - * to another. - */ -dri_fence * -intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name, - drm_fence_arg_t *arg) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; - dri_fence_ttm *ttm_fence; - - ttm_fence = malloc(sizeof(*ttm_fence)); - if (!ttm_fence) - return NULL; - - ttm_fence->drm_fence.handle = arg->handle; - ttm_fence->drm_fence.fence_class = arg->fence_class; - ttm_fence->drm_fence.type = arg->type; - ttm_fence->drm_fence.flags = arg->flags; - ttm_fence->drm_fence.signaled = 0; - ttm_fence->drm_fence.sequence = arg->sequence; - - ttm_fence->fence.bufmgr = bufmgr; - ttm_fence->name = name; - ttm_fence->refcount = 1; - - DBG("fence_create_from_handle: %p (%s)\n", - &ttm_fence->fence, ttm_fence->name); - - return &ttm_fence->fence; -} - - -static void -dri_ttm_fence_reference(dri_fence *fence) -{ - dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence; - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr; - - ++fence_ttm->refcount; - DBG("fence_reference: %p (%s)\n", &fence_ttm->fence, fence_ttm->name); -} - -static void -dri_ttm_fence_unreference(dri_fence *fence) -{ - dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence; - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr; - - if (!fence) - return; - - DBG("fence_unreference: %p (%s)\n", &fence_ttm->fence, fence_ttm->name); - - if (--fence_ttm->refcount == 0) { - int ret; - - ret = drmFenceUnreference(bufmgr_ttm->fd, &fence_ttm->drm_fence); - if (ret != 0) { - fprintf(stderr, "drmFenceUnreference failed (%s): %s\n", - fence_ttm->name, strerror(-ret)); - } - - free(fence); - return; - } -} - -static void -dri_ttm_fence_wait(dri_fence *fence) -{ - dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence; - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr; - int ret; - - ret = drmFenceWait(bufmgr_ttm->fd, DRM_FENCE_FLAG_WAIT_LAZY, &fence_ttm->drm_fence, 0); - if (ret != 0) { - fprintf(stderr, "%s:%d: Error waiting for fence %s: %s.\n", - __FILE__, __LINE__, fence_ttm->name, strerror(-ret)); - abort(); - } - - DBG("fence_wait: %p (%s)\n", &fence_ttm->fence, fence_ttm->name); -} - -static void -dri_bufmgr_ttm_destroy(dri_bufmgr *bufmgr) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; - int i; - - free(bufmgr_ttm->validate_array); - - /* Free any cached buffer objects we were going to reuse */ - for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) { - struct dri_ttm_bo_bucket *bucket = &bufmgr_ttm->cache_bucket[i]; - struct dri_ttm_bo_bucket_entry *entry; - - while ((entry = bucket->head) != NULL) { - int ret; - - bucket->head = entry->next; - if (entry->next == NULL) - bucket->tail = &bucket->head; - bucket->num_entries--; - - /* Decrement the kernel refcount for the buffer. */ - ret = drmBOUnreference(bufmgr_ttm->fd, &entry->drm_bo); - if (ret != 0) { - fprintf(stderr, "drmBOUnreference failed: %s\n", - strerror(-ret)); - } - - free(entry); - } - } - - free(bufmgr); -} - -/** - * Adds the target buffer to the validation list and adds the relocation - * to the reloc_buffer's relocation list. - * - * The relocation entry at the given offset must already contain the - * precomputed relocation value, because the kernel will optimize out - * the relocation entry write when the buffer hasn't moved from the - * last known offset in target_buf. - */ -static int -dri_ttm_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta, - GLuint offset, dri_bo *target_buf) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)reloc_buf->bufmgr; - dri_bo_ttm *reloc_buf_ttm = (dri_bo_ttm *)reloc_buf; - dri_bo_ttm *target_buf_ttm = (dri_bo_ttm *)target_buf; - int num_relocs; - uint32_t *this_reloc; - - /* Create a new relocation list if needed */ - if (reloc_buf_ttm->reloc_buf_data == NULL) - intel_setup_reloc_list(reloc_buf); - - num_relocs = reloc_buf_ttm->reloc_buf_data[0]; - - /* Check overflow */ - assert(num_relocs < bufmgr_ttm->max_relocs); - - this_reloc = reloc_buf_ttm->reloc_buf_data + I915_RELOC_HEADER + - num_relocs * I915_RELOC0_STRIDE; - - this_reloc[0] = offset; - this_reloc[1] = delta; - this_reloc[2] = target_buf_ttm->drm_bo.handle; /* To be filled in at exec time */ - this_reloc[3] = 0; - - reloc_buf_ttm->relocs[num_relocs].validate_flags = flags; - reloc_buf_ttm->relocs[num_relocs].target_buf = target_buf; - dri_bo_reference(target_buf); - - reloc_buf_ttm->reloc_buf_data[0]++; /* Increment relocation count */ - /* Check wraparound */ - assert(reloc_buf_ttm->reloc_buf_data[0] != 0); - return 0; -} - -/** - * Walk the tree of relocations rooted at BO and accumulate the list of - * validations to be performed and update the relocation buffers with - * index values into the validation list. - */ -static void -dri_ttm_bo_process_reloc(dri_bo *bo) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bo->bufmgr; - dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; - unsigned int nr_relocs; - int i; - - if (bo_ttm->reloc_buf_data == NULL) - return; - - nr_relocs = bo_ttm->reloc_buf_data[0] & 0xffff; - - for (i = 0; i < nr_relocs; i++) { - struct dri_ttm_reloc *r = &bo_ttm->relocs[i]; - - /* Continue walking the tree depth-first. */ - dri_ttm_bo_process_reloc(r->target_buf); - - /* Add the target to the validate list */ - intel_add_validate_buffer(r->target_buf, r->validate_flags); - - /* Clear the PRESUMED_OFFSET flag from the validate list entry of the - * target if this buffer has a stale relocated pointer at it. - */ - if (r->last_target_offset != r->target_buf->offset) { - dri_bo_ttm *target_buf_ttm = (dri_bo_ttm *)r->target_buf; - struct intel_validate_entry *entry = - &bufmgr_ttm->validate_array[target_buf_ttm->validate_index]; - - entry->bo_arg.d.req.bo_req.hint &= ~DRM_BO_HINT_PRESUMED_OFFSET; - } - } -} - -static void * -dri_ttm_process_reloc(dri_bo *batch_buf, GLuint *count) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr; - - /* Update indices and set up the validate list. */ - dri_ttm_bo_process_reloc(batch_buf); - - /* Add the batch buffer to the validation list. There are no relocations - * pointing to it. - */ - intel_add_validate_buffer(batch_buf, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE); - - *count = bufmgr_ttm->validate_count; - return &bufmgr_ttm->validate_array[0].bo_arg; -} - -static const char * -intel_get_flags_mem_type_string(uint64_t flags) -{ - switch (flags & DRM_BO_MASK_MEM) { - case DRM_BO_FLAG_MEM_LOCAL: return "local"; - case DRM_BO_FLAG_MEM_TT: return "ttm"; - case DRM_BO_FLAG_MEM_VRAM: return "vram"; - case DRM_BO_FLAG_MEM_PRIV0: return "priv0"; - case DRM_BO_FLAG_MEM_PRIV1: return "priv1"; - case DRM_BO_FLAG_MEM_PRIV2: return "priv2"; - case DRM_BO_FLAG_MEM_PRIV3: return "priv3"; - case DRM_BO_FLAG_MEM_PRIV4: return "priv4"; - default: return NULL; - } -} - -static const char * -intel_get_flags_caching_string(uint64_t flags) -{ - switch (flags & (DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED)) { - case 0: return "UU"; - case DRM_BO_FLAG_CACHED: return "CU"; - case DRM_BO_FLAG_CACHED_MAPPED: return "UC"; - case DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED: return "CC"; - default: return NULL; - } -} - -static void -intel_update_buffer_offsets (dri_bufmgr_ttm *bufmgr_ttm) -{ - int i; - - for (i = 0; i < bufmgr_ttm->validate_count; i++) { - dri_bo *bo = bufmgr_ttm->validate_array[i].bo; - dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; - struct drm_i915_op_arg *arg = &bufmgr_ttm->validate_array[i].bo_arg; - struct drm_bo_arg_rep *rep = &arg->d.rep; - - /* Update the flags */ - if (rep->bo_info.flags != bo_ttm->last_flags) { - DBG("BO %s migrated: %s/%s -> %s/%s\n", - bo_ttm->name, - intel_get_flags_mem_type_string(bo_ttm->last_flags), - intel_get_flags_caching_string(bo_ttm->last_flags), - intel_get_flags_mem_type_string(rep->bo_info.flags), - intel_get_flags_caching_string(rep->bo_info.flags)); - - bo_ttm->last_flags = rep->bo_info.flags; - } - /* Update the buffer offset */ - if (rep->bo_info.offset != bo->offset) { - DBG("BO %s migrated: 0x%08lx -> 0x%08lx\n", - bo_ttm->name, bo->offset, (unsigned long)rep->bo_info.offset); - bo->offset = rep->bo_info.offset; - } - } -} - -/** - * Update the last target offset field of relocation entries for PRESUMED_OFFSET - * computation. - */ -static void -dri_ttm_bo_post_submit(dri_bo *bo) -{ - dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; - unsigned int nr_relocs; - int i; - - if (bo_ttm->reloc_buf_data == NULL) - return; - - nr_relocs = bo_ttm->reloc_buf_data[0] & 0xffff; - - for (i = 0; i < nr_relocs; i++) { - struct dri_ttm_reloc *r = &bo_ttm->relocs[i]; - - /* Continue walking the tree depth-first. */ - dri_ttm_bo_post_submit(r->target_buf); - - r->last_target_offset = r->target_buf->offset; - } -} - -static void -dri_ttm_post_submit(dri_bo *batch_buf, dri_fence **last_fence) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr; - int i; - - intel_update_buffer_offsets (bufmgr_ttm); - - dri_ttm_bo_post_submit(batch_buf); - - if (bufmgr_ttm->bufmgr.debug) - dri_ttm_dump_validation_list(bufmgr_ttm); - - for (i = 0; i < bufmgr_ttm->validate_count; i++) { - dri_bo *bo = bufmgr_ttm->validate_array[i].bo; - dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; - - /* Disconnect the buffer from the validate list */ - bo_ttm->validate_index = -1; - dri_bo_unreference(bo); - bufmgr_ttm->validate_array[i].bo = NULL; - } - bufmgr_ttm->validate_count = 0; -} - -/** - * Enables unlimited caching of buffer objects for reuse. - * - * This is potentially very memory expensive, as the cache at each bucket - * size is only bounded by how many buffers of that size we've managed to have - * in flight at once. - */ -void -intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; - int i; - - for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) { - bufmgr_ttm->cache_bucket[i].max_entries = -1; - } -} - -/* - * - */ -static int -dri_ttm_check_aperture_space(dri_bo *bo) -{ - return 0; -} - -/** - * Initializes the TTM buffer manager, which uses the kernel to allocate, map, - * and manage map buffer objections. - * - * \param fd File descriptor of the opened DRM device. - * \param fence_type Driver-specific fence type used for fences with no flush. - * \param fence_type_flush Driver-specific fence type used for fences with a - * flush. - */ -dri_bufmgr * -intel_bufmgr_ttm_init(int fd, unsigned int fence_type, - unsigned int fence_type_flush, int batch_size) -{ - dri_bufmgr_ttm *bufmgr_ttm; - int i; - - bufmgr_ttm = calloc(1, sizeof(*bufmgr_ttm)); - bufmgr_ttm->fd = fd; - bufmgr_ttm->fence_type = fence_type; - bufmgr_ttm->fence_type_flush = fence_type_flush; - - /* Let's go with one relocation per every 2 dwords (but round down a bit - * since a power of two will mean an extra page allocation for the reloc - * buffer). - * - * Every 4 was too few for the blender benchmark. - */ - bufmgr_ttm->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; - - bufmgr_ttm->bufmgr.bo_alloc = dri_ttm_alloc; - bufmgr_ttm->bufmgr.bo_alloc_static = dri_ttm_alloc_static; - bufmgr_ttm->bufmgr.bo_reference = dri_ttm_bo_reference; - bufmgr_ttm->bufmgr.bo_unreference = dri_ttm_bo_unreference; - bufmgr_ttm->bufmgr.bo_map = dri_ttm_bo_map; - bufmgr_ttm->bufmgr.bo_unmap = dri_ttm_bo_unmap; - bufmgr_ttm->bufmgr.fence_reference = dri_ttm_fence_reference; - bufmgr_ttm->bufmgr.fence_unreference = dri_ttm_fence_unreference; - bufmgr_ttm->bufmgr.fence_wait = dri_ttm_fence_wait; - bufmgr_ttm->bufmgr.destroy = dri_bufmgr_ttm_destroy; - bufmgr_ttm->bufmgr.emit_reloc = dri_ttm_emit_reloc; - bufmgr_ttm->bufmgr.process_relocs = dri_ttm_process_reloc; - bufmgr_ttm->bufmgr.post_submit = dri_ttm_post_submit; - bufmgr_ttm->bufmgr.debug = GL_FALSE; - bufmgr_ttm->bufmgr.check_aperture_space = dri_ttm_check_aperture_space; - /* Initialize the linked lists for BO reuse cache. */ - for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) - bufmgr_ttm->cache_bucket[i].tail = &bufmgr_ttm->cache_bucket[i].head; - - return &bufmgr_ttm->bufmgr; -} -#else -dri_bufmgr * -intel_bufmgr_ttm_init(int fd, unsigned int fence_type, - unsigned int fence_type_flush, int batch_size) -{ - return NULL; -} - -dri_bo * -intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, - unsigned int handle) -{ - return NULL; -} - -void -intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr) -{ -} -#endif diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h deleted file mode 100644 index f5bd64c90f..0000000000 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h +++ /dev/null @@ -1,28 +0,0 @@ - -#ifndef INTEL_BUFMGR_TTM_H -#define INTEL_BUFMGR_TTM_H - -#include "dri_bufmgr.h" - -extern dri_bo *intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, - unsigned int handle); - -#ifdef TTM_API -dri_fence *intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name, - drm_fence_arg_t *arg); -#endif - - -dri_bufmgr *intel_bufmgr_ttm_init(int fd, unsigned int fence_type, - unsigned int fence_type_flush, int batch_size); - -void -intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr); - -#ifndef TTM_API -#define DRM_I915_FENCE_CLASS_ACCEL 0 -#define DRM_I915_FENCE_TYPE_RW 2 -#define DRM_I915_FENCE_FLAG_FLUSHED 0x01000000 -#endif - -#endif diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 5fa9d95e11..50c1964d87 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -59,7 +59,7 @@ #include "intel_buffer_objects.h" #include "intel_fbo.h" #include "intel_decode.h" -#include "intel_bufmgr_ttm.h" +#include "intel_bufmgr.h" #include "drirenderbuffer.h" #include "vblank.h" @@ -96,11 +96,13 @@ int INTEL_DEBUG = (0); #include "extension_helper.h" -#define DRIVER_DATE "20061102" +#define DRIVER_DATE "20080716" +#define DRIVER_DATE_GEM "GEM " DRIVER_DATE static const GLubyte * intelGetString(GLcontext * ctx, GLenum name) { + const struct intel_context *const intel = intel_context(ctx); const char *chipset; static char buffer[128]; @@ -110,7 +112,7 @@ intelGetString(GLcontext * ctx, GLenum name) break; case GL_RENDERER: - switch (intel_context(ctx)->intelScreen->deviceID) { + switch (intel->intelScreen->deviceID) { case PCI_CHIP_845_G: chipset = "Intel(R) 845G"; break; @@ -183,7 +185,9 @@ intelGetString(GLcontext * ctx, GLenum name) break; } - (void) driGetRendererString(buffer, chipset, DRIVER_DATE, 0); + (void) driGetRendererString(buffer, chipset, + (intel->ttm) ? DRIVER_DATE_GEM : DRIVER_DATE, + 0); return (GLubyte *) buffer; default: @@ -367,22 +371,34 @@ intelFlush(GLcontext * ctx) if (!IS_965(intel->intelScreen->deviceID)) INTEL_FIREVERTICES(intel); + /* Emit a flush so that any frontbuffer rendering that might have occurred + * lands onscreen in a timely manner, even if the X Server doesn't trigger + * a flush for us. + */ + intel_batchbuffer_emit_mi_flush(intel->batch); + if (intel->batch->map != intel->batch->ptr) intel_batchbuffer_flush(intel->batch); - - /* XXX: Need to do an MI_FLUSH here. - */ } void intelFinish(GLcontext * ctx) { - struct intel_context *intel = intel_context(ctx); + struct gl_framebuffer *fb = ctx->DrawBuffer; + int i; + intelFlush(ctx); - if (intel->batch->last_fence) { - dri_fence_wait(intel->batch->last_fence); - dri_fence_unreference(intel->batch->last_fence); - intel->batch->last_fence = NULL; + + for (i = 0; i < fb->_NumColorDrawBuffers; i++) { + struct intel_renderbuffer *irb; + + irb = intel_renderbuffer(fb->_ColorDrawBuffers[i]); + + if (irb->region) + dri_bo_wait_rendering(irb->region->buffer); + } + if (fb->_DepthBuffer) { + /* XXX: Wait on buffer idle */ } } @@ -448,28 +464,32 @@ static GLboolean intel_init_bufmgr(struct intel_context *intel) { intelScreenPrivate *intelScreen = intel->intelScreen; - GLboolean ttm_disable = getenv("INTEL_NO_TTM") != NULL; - GLboolean ttm_supported; + GLboolean gem_disable = getenv("INTEL_NO_GEM") != NULL; + int gem_kernel = 0; + GLboolean gem_supported; + struct drm_i915_getparam gp; + + gp.param = I915_PARAM_HAS_GEM; + gp.value = &gem_kernel; - /* If we've got a new enough DDX that's initializing TTM and giving us + (void) drmCommandWriteRead(intel->driFd, DRM_I915_GETPARAM, &gp, sizeof(gp)); + + /* If we've got a new enough DDX that's initializing GEM and giving us * object handles for the shared buffers, use that. */ intel->ttm = GL_FALSE; if (intel->intelScreen->driScrnPriv->dri2.enabled) - ttm_supported = GL_TRUE; + gem_supported = GL_TRUE; else if (intel->intelScreen->driScrnPriv->ddx_version.minor >= 9 && - intel->intelScreen->drmMinor >= 11 && + gem_kernel && intel->intelScreen->front.bo_handle != -1) - ttm_supported = GL_TRUE; + gem_supported = GL_TRUE; else - ttm_supported = GL_FALSE; + gem_supported = GL_FALSE; - if (!ttm_disable && ttm_supported) { + if (!gem_disable && gem_supported) { int bo_reuse_mode; - intel->bufmgr = intel_bufmgr_ttm_init(intel->driFd, - DRM_FENCE_TYPE_EXE, - DRM_FENCE_TYPE_EXE | - DRM_I915_FENCE_TYPE_RW, + intel->bufmgr = intel_bufmgr_gem_init(intel->driFd, BATCH_SZ); if (intel->bufmgr != NULL) intel->ttm = GL_TRUE; @@ -479,16 +499,16 @@ intel_init_bufmgr(struct intel_context *intel) case DRI_CONF_BO_REUSE_DISABLED: break; case DRI_CONF_BO_REUSE_ALL: - intel_ttm_enable_bo_reuse(intel->bufmgr); + intel_bufmgr_gem_enable_reuse(intel->bufmgr); break; } } /* Otherwise, use the classic buffer manager. */ if (intel->bufmgr == NULL) { - if (ttm_disable) { - fprintf(stderr, "TTM buffer manager disabled. Using classic.\n"); + if (gem_disable) { + fprintf(stderr, "GEM disabled. Using classic.\n"); } else { - fprintf(stderr, "Failed to initialize TTM buffer manager. " + fprintf(stderr, "Failed to initialize GEM. " "Falling back to classic.\n"); } @@ -498,14 +518,17 @@ intel_init_bufmgr(struct intel_context *intel) return GL_FALSE; } - intel->bufmgr = dri_bufmgr_fake_init(intelScreen->tex.offset, - intelScreen->tex.map, - intelScreen->tex.size, - intel_fence_emit, - intel_fence_wait, - intel); + intel->bufmgr = intel_bufmgr_fake_init(intelScreen->tex.offset, + intelScreen->tex.map, + intelScreen->tex.size, + intel_fence_emit, + intel_fence_wait, + intel); } + /* XXX bufmgr should be per-screen, not per-context */ + intelScreen->ttm = intel->ttm; + return GL_TRUE; } @@ -673,8 +696,6 @@ intelInitContext(struct intel_context *intel, intel_recreate_static_regions(intel); intel->batch = intel_batchbuffer_alloc(intel); - intel->last_swap_fence = NULL; - intel->first_swap_fence = NULL; intel_bufferobj_init(intel); intel_fbo_init(intel); @@ -692,7 +713,6 @@ intelInitContext(struct intel_context *intel, /* Force all software fallbacks */ if (driQueryOptionb(&intel->optionCache, "no_rast")) { fprintf(stderr, "disabling 3D rasterization\n"); - FALLBACK(intel, INTEL_FALLBACK_USER, 1); intel->no_rast = 1; } @@ -727,17 +747,7 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) intel->Fallback = 0; /* don't call _swrast_Flush later */ intel_batchbuffer_free(intel->batch); - - if (intel->last_swap_fence) { - dri_fence_wait(intel->last_swap_fence); - dri_fence_unreference(intel->last_swap_fence); - intel->last_swap_fence = NULL; - } - if (intel->first_swap_fence) { - dri_fence_wait(intel->first_swap_fence); - dri_fence_unreference(intel->first_swap_fence); - intel->first_swap_fence = NULL; - } + free(intel->prim.vb); if (release_texture_heaps) { /* This share group is about to go away, free our private @@ -889,7 +899,7 @@ intelContendedLock(struct intel_context *intel, GLuint flags) */ if (!intel->ttm && sarea->texAge != intel->hHWContext) { sarea->texAge = intel->hHWContext; - dri_bufmgr_fake_contended_lock_take(intel->bufmgr); + intel_bufmgr_fake_contended_lock_take(intel->bufmgr); if (INTEL_DEBUG & DEBUG_BATCH) intel_decode_context_reset(); if (INTEL_DEBUG & DEBUG_BUFMGR) @@ -1017,6 +1027,7 @@ void UNLOCK_HARDWARE( struct intel_context *intel ) * Nothing should be left in batch outside of LOCK/UNLOCK which references * cliprects. */ - assert(intel->batch->cliprect_mode != REFERENCES_CLIPRECTS); + if (intel->batch->cliprect_mode == REFERENCES_CLIPRECTS) + intel_batchbuffer_flush(intel->batch); } diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index df79ab8897..f9a373cf74 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -35,6 +35,7 @@ #include "mm.h" #include "texmem.h" #include "dri_bufmgr.h" +#include "intel_bufmgr.h" #include "intel_screen.h" #include "intel_tex_obj.h" @@ -85,6 +86,7 @@ struct intel_context { void (*destroy) (struct intel_context * intel); void (*emit_state) (struct intel_context * intel); + void (*finish_batch) (struct intel_context * intel); void (*new_batch) (struct intel_context * intel); void (*emit_invarient_state) (struct intel_context * intel); void (*note_fence) (struct intel_context *intel, GLuint fence); @@ -174,9 +176,6 @@ struct intel_context */ GLboolean ttm; - dri_fence *last_swap_fence; - dri_fence *first_swap_fence; - struct intel_batchbuffer *batch; GLboolean no_batch_wrap; unsigned batch_id; @@ -184,9 +183,13 @@ struct intel_context struct { GLuint id; - GLuint primitive; - GLubyte *start_ptr; + uint32_t primitive; /**< Current hardware primitive type */ void (*flush) (struct intel_context *); + dri_bo *vb_bo; + uint8_t *vb; + unsigned int start_offset; /**< Byte offset of primitive sequence */ + unsigned int current_offset; /**< Byte offset of next vertex */ + unsigned int count; /**< Number of vertices in current primitive */ } prim; GLuint stats_wm; @@ -291,6 +294,7 @@ extern char *__progname; #define SUBPIXEL_X 0.125 #define SUBPIXEL_Y 0.125 +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) #define ALIGN(value, alignment) ((value + alignment - 1) & ~(alignment - 1)) #define INTEL_FIREVERTICES(intel) \ diff --git a/src/mesa/drivers/dri/intel/intel_decode.c b/src/mesa/drivers/dri/intel/intel_decode.c index a1240639f4..9c105013c0 100644 --- a/src/mesa/drivers/dri/intel/intel_decode.c +++ b/src/mesa/drivers/dri/intel/intel_decode.c @@ -183,9 +183,10 @@ decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures) switch ((data[0] & 0x1fc00000) >> 22) { case 0x50: instr_out(data, hw_offset, 0, - "XY_COLOR_BLT (rgb %sabled, alpha %sabled)\n", + "XY_COLOR_BLT (rgb %sabled, alpha %sabled, dst tile %d)\n", (data[0] & (1 << 20)) ? "en" : "dis", - (data[0] & (1 << 21)) ? "en" : "dis"); + (data[0] & (1 << 21)) ? "en" : "dis", + (data[0] >> 11) & 1); len = (data[0] & 0x000000ff) + 2; if (len != 6) @@ -210,7 +211,8 @@ decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures) instr_out(data, hw_offset, 1, "format %s, pitch %d, " "clipping %sabled\n", format, - data[1] & 0xffff, data[1] & (1 << 30) ? "en" : "dis"); + (short)(data[1] & 0xffff), + data[1] & (1 << 30) ? "en" : "dis"); instr_out(data, hw_offset, 2, "(%d,%d)\n", data[2] & 0xffff, data[2] >> 16); instr_out(data, hw_offset, 3, "(%d,%d)\n", @@ -220,9 +222,12 @@ decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures) return len; case 0x53: instr_out(data, hw_offset, 0, - "XY_SRC_COPY_BLT (rgb %sabled, alpha %sabled)\n", + "XY_SRC_COPY_BLT (rgb %sabled, alpha %sabled, " + "src tile %d, dst tile %d)\n", (data[0] & (1 << 20)) ? "en" : "dis", - (data[0] & (1 << 21)) ? "en" : "dis"); + (data[0] & (1 << 21)) ? "en" : "dis", + (data[0] >> 15) & 1, + (data[0] >> 11) & 1); len = (data[0] & 0x000000ff) + 2; if (len != 8) @@ -247,16 +252,17 @@ decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures) instr_out(data, hw_offset, 1, "format %s, dst pitch %d, " "clipping %sabled\n", format, - data[1] & 0xffff, data[1] & (1 << 30) ? "en" : "dis"); + (short)(data[1] & 0xffff), + data[1] & (1 << 30) ? "en" : "dis"); instr_out(data, hw_offset, 2, "dst (%d,%d)\n", data[2] & 0xffff, data[2] >> 16); instr_out(data, hw_offset, 3, "dst (%d,%d)\n", - data[2] & 0xffff, data[2] >> 16); + data[3] & 0xffff, data[3] >> 16); instr_out(data, hw_offset, 4, "dst offset 0x%08x\n", data[4]); instr_out(data, hw_offset, 5, "src (%d,%d)\n", data[5] & 0xffff, data[5] >> 16); instr_out(data, hw_offset, 6, "src pitch %d\n", - data[6] & 0xffff); + (short)(data[6] & 0xffff)); instr_out(data, hw_offset, 7, "src offset 0x%08x\n", data[7]); return len; } diff --git a/src/mesa/drivers/dri/intel/intel_depthstencil.c b/src/mesa/drivers/dri/intel/intel_depthstencil.c index 90baecd8c2..70ba68e9e3 100644 --- a/src/mesa/drivers/dri/intel/intel_depthstencil.c +++ b/src/mesa/drivers/dri/intel/intel_depthstencil.c @@ -39,7 +39,7 @@ #include "intel_fbo.h" #include "intel_depthstencil.h" #include "intel_regions.h" - +#include "intel_span.h" /** * The GL_EXT_framebuffer_object allows the user to create their own @@ -86,68 +86,33 @@ * */ - - -static void -map_regions(GLcontext * ctx, - struct intel_renderbuffer *depthRb, - struct intel_renderbuffer *stencilRb) -{ - struct intel_context *intel = intel_context(ctx); - if (depthRb && depthRb->region) { - intel_region_map(intel, depthRb->region); - depthRb->pfMap = depthRb->region->map; - depthRb->pfPitch = depthRb->region->pitch; - } - if (stencilRb && stencilRb->region) { - intel_region_map(intel, stencilRb->region); - stencilRb->pfMap = stencilRb->region->map; - stencilRb->pfPitch = stencilRb->region->pitch; - } -} - -static void -unmap_regions(GLcontext * ctx, - struct intel_renderbuffer *depthRb, - struct intel_renderbuffer *stencilRb) -{ - struct intel_context *intel = intel_context(ctx); - if (depthRb && depthRb->region) { - intel_region_unmap(intel, depthRb->region); - depthRb->pfMap = NULL; - depthRb->pfPitch = 0; - } - if (stencilRb && stencilRb->region) { - intel_region_unmap(intel, stencilRb->region); - stencilRb->pfMap = NULL; - stencilRb->pfPitch = 0; - } -} - - - /** * Undo the pairing/interleaving between depth and stencil buffers. * irb should be a depth/stencil or stencil renderbuffer. */ void -intel_unpair_depth_stencil(GLcontext * ctx, struct intel_renderbuffer *irb) +intel_unpair_depth_stencil(GLcontext *ctx, struct intel_renderbuffer *irb) { + struct intel_context *intel = intel_context(ctx); + struct gl_renderbuffer *rb = &irb->Base; + if (irb->PairedStencil) { /* irb is a depth/stencil buffer */ struct gl_renderbuffer *stencilRb; struct intel_renderbuffer *stencilIrb; - ASSERT(irb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT); + ASSERT(rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT); stencilRb = _mesa_lookup_renderbuffer(ctx, irb->PairedStencil); stencilIrb = intel_renderbuffer(stencilRb); if (stencilIrb) { /* need to extract stencil values from the depth buffer */ - ASSERT(stencilIrb->PairedDepth == irb->Base.Name); - map_regions(ctx, irb, stencilIrb); - _mesa_extract_stencil(ctx, &irb->Base, &stencilIrb->Base); - unmap_regions(ctx, irb, stencilIrb); + ASSERT(stencilIrb->PairedDepth == rb->Name); + intel_renderbuffer_map(intel, rb); + intel_renderbuffer_map(intel, stencilRb); + _mesa_extract_stencil(ctx, rb, stencilRb); + intel_renderbuffer_unmap(intel, stencilRb); + intel_renderbuffer_unmap(intel, rb); stencilIrb->PairedDepth = 0; } irb->PairedStencil = 0; @@ -157,17 +122,19 @@ intel_unpair_depth_stencil(GLcontext * ctx, struct intel_renderbuffer *irb) struct gl_renderbuffer *depthRb; struct intel_renderbuffer *depthIrb; - ASSERT(irb->Base._ActualFormat == GL_STENCIL_INDEX8_EXT || - irb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT); + ASSERT(rb->_ActualFormat == GL_STENCIL_INDEX8_EXT || + rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT); depthRb = _mesa_lookup_renderbuffer(ctx, irb->PairedDepth); depthIrb = intel_renderbuffer(depthRb); if (depthIrb) { /* need to extract stencil values from the depth buffer */ - ASSERT(depthIrb->PairedStencil == irb->Base.Name); - map_regions(ctx, depthIrb, irb); - _mesa_extract_stencil(ctx, &depthIrb->Base, &irb->Base); - unmap_regions(ctx, depthIrb, irb); + ASSERT(depthIrb->PairedStencil == rb->Name); + intel_renderbuffer_map(intel, rb); + intel_renderbuffer_map(intel, depthRb); + _mesa_extract_stencil(ctx, depthRb, rb); + intel_renderbuffer_unmap(intel, depthRb); + intel_renderbuffer_unmap(intel, rb); depthIrb->PairedStencil = 0; } irb->PairedDepth = 0; @@ -194,6 +161,7 @@ void intel_validate_paired_depth_stencil(GLcontext * ctx, struct gl_framebuffer *fb) { + struct intel_context *intel = intel_context(ctx); struct intel_renderbuffer *depthRb, *stencilRb; depthRb = intel_get_renderbuffer(fb, BUFFER_DEPTH); @@ -230,9 +198,11 @@ intel_validate_paired_depth_stencil(GLcontext * ctx, stencilRb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT); /* establish new pairing: interleave stencil into depth buffer */ - map_regions(ctx, depthRb, stencilRb); + intel_renderbuffer_map(intel, &depthRb->Base); + intel_renderbuffer_map(intel, &stencilRb->Base); _mesa_insert_stencil(ctx, &depthRb->Base, &stencilRb->Base); - unmap_regions(ctx, depthRb, stencilRb); + intel_renderbuffer_unmap(intel, &stencilRb->Base); + intel_renderbuffer_unmap(intel, &depthRb->Base); depthRb->PairedStencil = stencilRb->Base.Name; stencilRb->PairedDepth = depthRb->Base.Name; } diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index b3f6610546..5bd2ebfdcf 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -153,6 +153,9 @@ intel_delete_renderbuffer(struct gl_renderbuffer *rb) intel_unpair_depth_stencil(ctx, irb); } + if (irb->span_cache != NULL) + _mesa_free(irb->span_cache); + if (intel && irb->region) { intel_region_release(&irb->region); } @@ -209,6 +212,14 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, case GL_RGB10: case GL_RGB12: case GL_RGB16: + rb->_ActualFormat = GL_RGB8; + rb->DataType = GL_UNSIGNED_BYTE; + rb->RedBits = 8; + rb->GreenBits = 8; + rb->BlueBits = 8; + rb->AlphaBits = 0; + cpp = 4; + break; case GL_RGBA: case GL_RGBA2: case GL_RGBA4: @@ -294,9 +305,6 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, rb->Width = width; rb->Height = height; - /* This sets the Get/PutRow/Value functions */ - intel_set_span_functions(&irb->Base); - return GL_TRUE; } } @@ -366,7 +374,6 @@ intel_renderbuffer_set_region(struct intel_renderbuffer *rb, intel_region_reference(&rb->region, region); intel_region_release(&old); - rb->pfMap = region->map; rb->pfPitch = region->pitch; } @@ -446,8 +453,6 @@ intel_create_renderbuffer(GLenum intFormat) irb->Base.Delete = intel_delete_renderbuffer; irb->Base.AllocStorage = intel_alloc_window_storage; irb->Base.GetPointer = intel_get_pointer; - /* This sets the Get/PutRow/Value functions */ - intel_set_span_functions(&irb->Base); return irb; } @@ -519,7 +524,7 @@ intel_framebuffer_renderbuffer(GLcontext * ctx, static GLboolean intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, - struct gl_texture_image *texImage) + struct gl_texture_image *texImage) { if (texImage->TexFormat == &_mesa_texformat_argb8888) { irb->Base._ActualFormat = GL_RGBA8; @@ -558,7 +563,6 @@ intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, irb->Base.Delete = intel_delete_renderbuffer; irb->Base.AllocStorage = intel_nop_alloc_storage; - intel_set_span_functions(&irb->Base); irb->RenderToTexture = GL_TRUE; diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h index c90c84b48c..9d15582d78 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.h +++ b/src/mesa/drivers/dri/intel/intel_fbo.h @@ -28,9 +28,9 @@ #ifndef INTEL_FBO_H #define INTEL_FBO_H +#include "intel_screen.h" struct intel_context; -struct intel_region; /** * Intel framebuffer, derived from gl_framebuffer. @@ -70,7 +70,6 @@ struct intel_renderbuffer { struct gl_renderbuffer Base; struct intel_region *region; - void *pfMap; /* possibly paged flipped map pointer */ GLuint pfPitch; /* possibly paged flipped pitch */ GLboolean RenderToTexture; /* RTT? */ @@ -80,6 +79,9 @@ struct intel_renderbuffer GLuint pf_pending; /**< sequence number of pending flip */ GLuint vbl_pending; /**< vblank sequence number of pending flip */ + + uint8_t *span_cache; + unsigned long span_cache_offset; }; extern struct intel_renderbuffer *intel_renderbuffer(struct gl_renderbuffer diff --git a/src/mesa/drivers/dri/intel/intel_ioctl.c b/src/mesa/drivers/dri/intel/intel_ioctl.c index f4566ba89c..58c81766cd 100644 --- a/src/mesa/drivers/dri/intel/intel_ioctl.c +++ b/src/mesa/drivers/dri/intel/intel_ioctl.c @@ -30,6 +30,8 @@ #include #include #include +#include +#include #include "mtypes.h" #include "context.h" @@ -43,7 +45,7 @@ #include "drm.h" #include "i915_drm.h" -#include "intel_bufmgr_ttm.h" +#include "intel_bufmgr.h" #define FILE_DEBUG_FLAG DEBUG_IOCTL @@ -104,7 +106,7 @@ intelWaitIrq(struct intel_context *intel, int seq) } -void +int intel_batch_ioctl(struct intel_context *intel, GLuint start_offset, GLuint used, @@ -113,7 +115,7 @@ intel_batch_ioctl(struct intel_context *intel, struct drm_i915_batchbuffer batch; if (intel->no_hw) - return; + return 0; assert(intel->locked); assert(used); @@ -142,82 +144,42 @@ intel_batch_ioctl(struct intel_context *intel, if (drmCommandWrite(intel->driFd, DRM_I915_BATCHBUFFER, &batch, sizeof(batch))) { fprintf(stderr, "DRM_I915_BATCHBUFFER: %d\n", -errno); - UNLOCK_HARDWARE(intel); - exit(1); + return -errno; } + + return 0; } -#ifdef TTM_API -void +int intel_exec_ioctl(struct intel_context *intel, GLuint used, GLboolean ignore_cliprects, GLboolean allow_unlock, - void *start, GLuint count, dri_fence **fence) + struct drm_i915_gem_execbuffer *execbuf) { - struct drm_i915_execbuffer execbuf; - dri_fence *fo; int ret; assert(intel->locked); assert(used); if (intel->no_hw) - return; - - if (*fence) { - dri_fence_unreference(*fence); - } + return 0; - memset(&execbuf, 0, sizeof(execbuf)); - - execbuf.num_buffers = count; - execbuf.batch.used = used; - execbuf.batch.cliprects = intel->pClipRects; - execbuf.batch.num_cliprects = ignore_cliprects ? 0 : intel->numClipRects; - execbuf.batch.DR1 = 0; - execbuf.batch.DR4 = ((((GLuint) intel->drawX) & 0xffff) | - (((GLuint) intel->drawY) << 16)); - - execbuf.ops_list = (unsigned long)start; // TODO - execbuf.fence_arg.flags = DRM_FENCE_FLAG_SHAREABLE | DRM_I915_FENCE_FLAG_FLUSHED; + execbuf->batch_start_offset = 0; + execbuf->batch_len = used; + execbuf->cliprects_ptr = (uintptr_t)intel->pClipRects; + execbuf->num_cliprects = ignore_cliprects ? 0 : intel->numClipRects; + execbuf->DR1 = 0; + execbuf->DR4 = ((((GLuint) intel->drawX) & 0xffff) | + (((GLuint) intel->drawY) << 16)); do { - ret = drmCommandWriteRead(intel->driFd, DRM_I915_EXECBUFFER, &execbuf, - sizeof(execbuf)); + ret = ioctl(intel->driFd, DRM_IOCTL_I915_GEM_EXECBUFFER, execbuf); } while (ret == -EAGAIN); if (ret != 0) { - fprintf(stderr, "DRM_I915_EXECBUFFER: %d\n", -errno); - UNLOCK_HARDWARE(intel); - exit(1); + fprintf(stderr, "DRM_I915_GEM_EXECBUFFER: %d\n", -errno); + return -errno; } - if (execbuf.fence_arg.error != 0) { - - /* - * Fence creation has failed, but the GPU has been - * idled by the kernel. Safe to continue. - */ - - *fence = NULL; - return; - } - - fo = intel_ttm_fence_create_from_arg(intel->bufmgr, "fence buffers", - &execbuf.fence_arg); - if (!fo) { - fprintf(stderr, "failed to fence handle: %08x\n", execbuf.fence_arg.handle); - UNLOCK_HARDWARE(intel); - exit(1); - } - *fence = fo; -} -#else -void -intel_exec_ioctl(struct intel_context *intel, - GLuint used, - GLboolean ignore_cliprects, GLboolean allow_unlock, - void *start, GLuint count, dri_fence **fence) -{ + return 0; } -#endif diff --git a/src/mesa/drivers/dri/intel/intel_ioctl.h b/src/mesa/drivers/dri/intel/intel_ioctl.h index 8674aef723..526e38358c 100644 --- a/src/mesa/drivers/dri/intel/intel_ioctl.h +++ b/src/mesa/drivers/dri/intel/intel_ioctl.h @@ -33,14 +33,14 @@ void intelWaitIrq( struct intel_context *intel, int seq ); int intelEmitIrqLocked( struct intel_context *intel ); -void intel_batch_ioctl( struct intel_context *intel, - GLuint start_offset, - GLuint used, - GLboolean ignore_cliprects, - GLboolean allow_unlock ); -void intel_exec_ioctl(struct intel_context *intel, +int intel_batch_ioctl(struct intel_context *intel, + GLuint start_offset, GLuint used, - GLboolean ignore_cliprects, GLboolean allow_unlock, - void *start, GLuint count, dri_fence **fence); + GLboolean ignore_cliprects, + GLboolean allow_unlock); +int intel_exec_ioctl(struct intel_context *intel, + GLuint used, + GLboolean ignore_cliprects, GLboolean allow_unlock, + struct drm_i915_gem_execbuffer *execbuf); #endif diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c index 81238acfe4..7e0d20e681 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c @@ -43,7 +43,7 @@ #include "intel_buffer_objects.h" #include "intel_buffers.h" #include "intel_pixel.h" - +#include "intel_reg.h" #define FILE_DEBUG_FLAG DEBUG_PIXEL @@ -293,7 +293,7 @@ do_blit_bitmap( GLcontext *ctx, dst->pitch, dst->buffer, 0, - dst->tiled, + dst->tiling, rect.x1 + px, rect.y2 - (py + h), w, h, @@ -301,9 +301,8 @@ do_blit_bitmap( GLcontext *ctx, } } } - out: - intel_batchbuffer_flush(intel->batch); } +out: UNLOCK_HARDWARE(intel); diff --git a/src/mesa/drivers/dri/intel/intel_pixel_copy.c b/src/mesa/drivers/dri/intel/intel_pixel_copy.c index 45f72bac52..3093ccf7c6 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_copy.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_copy.c @@ -229,7 +229,7 @@ do_texture_copypixels(GLcontext * ctx, out: intel->vtbl.leave_meta_state(intel); - intel_batchbuffer_flush(intel->batch); + intel_batchbuffer_emit_mi_flush(intel->batch); } UNLOCK_HARDWARE(intel); @@ -337,18 +337,16 @@ do_blit_copypixels(GLcontext * ctx, continue; intelEmitCopyBlit(intel, dst->cpp, - src->pitch, src->buffer, 0, src->tiled, - dst->pitch, dst->buffer, 0, dst->tiled, + src->pitch, src->buffer, 0, src->tiling, + dst->pitch, dst->buffer, 0, dst->tiling, clip_x + delta_x, clip_y + delta_y, /* srcx, srcy */ clip_x, clip_y, /* dstx, dsty */ clip_w, clip_h, ctx->Color.ColorLogicOpEnabled ? ctx->Color.LogicOp : GL_COPY); } - - out: - intel_batchbuffer_flush(intel->batch); } +out: UNLOCK_HARDWARE(intel); DBG("%s: success\n", __FUNCTION__); diff --git a/src/mesa/drivers/dri/intel/intel_pixel_draw.c b/src/mesa/drivers/dri/intel/intel_pixel_draw.c index 34813d2aa0..5675084da5 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_draw.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_draw.c @@ -81,7 +81,8 @@ do_texture_drawpixels(GLcontext * ctx, else { /* PBO only for now: */ -/* _mesa_printf("%s - not PBO\n", __FUNCTION__); */ + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s - not PBO\n", __FUNCTION__); return GL_FALSE; } @@ -180,7 +181,7 @@ do_texture_drawpixels(GLcontext * ctx, srcx, srcx + width, srcy + height, srcy); out: intel->vtbl.leave_meta_state(intel); - intel_batchbuffer_flush(intel->batch); + intel_batchbuffer_emit_mi_flush(intel->batch); } UNLOCK_HARDWARE(intel); return GL_TRUE; @@ -218,7 +219,6 @@ do_blit_drawpixels(GLcontext * ctx, struct intel_buffer_object *src = intel_buffer_object(unpack->BufferObj); GLuint src_offset; GLuint rowLength; - dri_fence *fence = NULL; if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s\n", __FUNCTION__); @@ -314,7 +314,7 @@ do_blit_drawpixels(GLcontext * ctx, intelEmitCopyBlit(intel, dest->cpp, rowLength, src_buffer, src_offset, GL_FALSE, - dest->pitch, dest->buffer, 0, dest->tiled, + dest->pitch, dest->buffer, 0, dest->tiling, rect.x1 - dest_rect.x1, rect.y2 - dest_rect.y2, rect.x1, @@ -322,17 +322,9 @@ do_blit_drawpixels(GLcontext * ctx, ctx->Color.ColorLogicOpEnabled ? ctx->Color.LogicOp : GL_COPY); } - intel_batchbuffer_flush(intel->batch); - fence = intel->batch->last_fence; - dri_fence_reference(fence); } UNLOCK_HARDWARE(intel); - if (fence) { - dri_fence_wait(fence); - dri_fence_unreference(fence); - } - if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s - DONE\n", __FUNCTION__); diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h index 37629c07e2..96af7e1a03 100644 --- a/src/mesa/drivers/dri/intel/intel_reg.h +++ b/src/mesa/drivers/dri/intel/intel_reg.h @@ -31,11 +31,140 @@ #define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23) +#define MI_FLUSH (CMD_MI | (4 << 23)) +#define FLUSH_MAP_CACHE (1 << 0) +#define INHIBIT_FLUSH_RENDER_CACHE (1 << 2) + /* Stalls command execution waiting for the given events to have occurred. */ #define MI_WAIT_FOR_EVENT (CMD_MI | (0x3 << 23)) #define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) #define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) +/* p189 */ +#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 (CMD_3D | (0x1d<<24) | (0x04<<16)) +#define I1_LOAD_S(n) (1<<(4+n)) + +/** @{ + * 915 definitions + */ +#define S0_VB_OFFSET_MASK 0xffffffc +#define S0_AUTO_CACHE_INV_DISABLE (1<<0) +/** @} */ + +/** @{ + * 830 definitions + */ +#define S0_VB_OFFSET_MASK_830 0xffffff8 +#define S0_VB_PITCH_SHIFT_830 1 +#define S0_VB_ENABLE_830 0 +/** @} */ + +#define S1_VERTEX_WIDTH_SHIFT 24 +#define S1_VERTEX_WIDTH_MASK (0x3f<<24) +#define S1_VERTEX_PITCH_SHIFT 16 +#define S1_VERTEX_PITCH_MASK (0x3f<<16) + +#define TEXCOORDFMT_2D 0x0 +#define TEXCOORDFMT_3D 0x1 +#define TEXCOORDFMT_4D 0x2 +#define TEXCOORDFMT_1D 0x3 +#define TEXCOORDFMT_2D_16 0x4 +#define TEXCOORDFMT_4D_16 0x5 +#define TEXCOORDFMT_NOT_PRESENT 0xf +#define S2_TEXCOORD_FMT0_MASK 0xf +#define S2_TEXCOORD_FMT1_SHIFT 4 +#define S2_TEXCOORD_FMT(unit, type) ((type)<<(unit*4)) +#define S2_TEXCOORD_NONE (~0) +#define S2_TEX_COUNT_SHIFT_830 12 +#define S2_VERTEX_0_WIDTH_SHIFT_830 0 +#define S2_VERTEX_1_WIDTH_SHIFT_830 6 +/* S3 not interesting */ + +#define S4_POINT_WIDTH_SHIFT 23 +#define S4_POINT_WIDTH_MASK (0x1ff<<23) +#define S4_LINE_WIDTH_SHIFT 19 +#define S4_LINE_WIDTH_ONE (0x2<<19) +#define S4_LINE_WIDTH_MASK (0xf<<19) +#define S4_FLATSHADE_ALPHA (1<<18) +#define S4_FLATSHADE_FOG (1<<17) +#define S4_FLATSHADE_SPECULAR (1<<16) +#define S4_FLATSHADE_COLOR (1<<15) +#define S4_CULLMODE_BOTH (0<<13) +#define S4_CULLMODE_NONE (1<<13) +#define S4_CULLMODE_CW (2<<13) +#define S4_CULLMODE_CCW (3<<13) +#define S4_CULLMODE_MASK (3<<13) +#define S4_VFMT_POINT_WIDTH (1<<12) +#define S4_VFMT_SPEC_FOG (1<<11) +#define S4_VFMT_COLOR (1<<10) +#define S4_VFMT_DEPTH_OFFSET (1<<9) +#define S4_VFMT_XYZ (1<<6) +#define S4_VFMT_XYZW (2<<6) +#define S4_VFMT_XY (3<<6) +#define S4_VFMT_XYW (4<<6) +#define S4_VFMT_XYZW_MASK (7<<6) +#define S4_FORCE_DEFAULT_DIFFUSE (1<<5) +#define S4_FORCE_DEFAULT_SPECULAR (1<<4) +#define S4_LOCAL_DEPTH_OFFSET_ENABLE (1<<3) +#define S4_VFMT_FOG_PARAM (1<<2) +#define S4_SPRITE_POINT_ENABLE (1<<1) +#define S4_LINE_ANTIALIAS_ENABLE (1<<0) + +#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH | \ + S4_VFMT_SPEC_FOG | \ + S4_VFMT_COLOR | \ + S4_VFMT_DEPTH_OFFSET | \ + S4_VFMT_XYZW_MASK | \ + S4_VFMT_FOG_PARAM) + + +#define S5_WRITEDISABLE_ALPHA (1<<31) +#define S5_WRITEDISABLE_RED (1<<30) +#define S5_WRITEDISABLE_GREEN (1<<29) +#define S5_WRITEDISABLE_BLUE (1<<28) +#define S5_WRITEDISABLE_MASK (0xf<<28) +#define S5_FORCE_DEFAULT_POINT_SIZE (1<<27) +#define S5_LAST_PIXEL_ENABLE (1<<26) +#define S5_GLOBAL_DEPTH_OFFSET_ENABLE (1<<25) +#define S5_FOG_ENABLE (1<<24) +#define S5_STENCIL_REF_SHIFT 16 +#define S5_STENCIL_REF_MASK (0xff<<16) +#define S5_STENCIL_TEST_FUNC_SHIFT 13 +#define S5_STENCIL_TEST_FUNC_MASK (0x7<<13) +#define S5_STENCIL_FAIL_SHIFT 10 +#define S5_STENCIL_FAIL_MASK (0x7<<10) +#define S5_STENCIL_PASS_Z_FAIL_SHIFT 7 +#define S5_STENCIL_PASS_Z_FAIL_MASK (0x7<<7) +#define S5_STENCIL_PASS_Z_PASS_SHIFT 4 +#define S5_STENCIL_PASS_Z_PASS_MASK (0x7<<4) +#define S5_STENCIL_WRITE_ENABLE (1<<3) +#define S5_STENCIL_TEST_ENABLE (1<<2) +#define S5_COLOR_DITHER_ENABLE (1<<1) +#define S5_LOGICOP_ENABLE (1<<0) + + +#define S6_ALPHA_TEST_ENABLE (1<<31) +#define S6_ALPHA_TEST_FUNC_SHIFT 28 +#define S6_ALPHA_TEST_FUNC_MASK (0x7<<28) +#define S6_ALPHA_REF_SHIFT 20 +#define S6_ALPHA_REF_MASK (0xff<<20) +#define S6_DEPTH_TEST_ENABLE (1<<19) +#define S6_DEPTH_TEST_FUNC_SHIFT 16 +#define S6_DEPTH_TEST_FUNC_MASK (0x7<<16) +#define S6_CBUF_BLEND_ENABLE (1<<15) +#define S6_CBUF_BLEND_FUNC_SHIFT 12 +#define S6_CBUF_BLEND_FUNC_MASK (0x7<<12) +#define S6_CBUF_SRC_BLEND_FACT_SHIFT 8 +#define S6_CBUF_SRC_BLEND_FACT_MASK (0xf<<8) +#define S6_CBUF_DST_BLEND_FACT_SHIFT 4 +#define S6_CBUF_DST_BLEND_FACT_MASK (0xf<<4) +#define S6_DEPTH_WRITE_ENABLE (1<<3) +#define S6_COLOR_WRITE_ENABLE (1<<2) +#define S6_TRISTRIP_PV_SHIFT 0 +#define S6_TRISTRIP_PV_MASK (0x3<<0) + +#define S7_DEPTH_OFFSET_CONST_MASK ~0 + /* Primitive dispatch on 830-945 */ #define _3DPRIMITIVE (CMD_3D | (0x1f << 24)) #define PRIM_INDIRECT (1<<23) diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index 35ab46afea..ddfdce3835 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -39,13 +39,17 @@ * last moment. */ +#include +#include + #include "intel_context.h" #include "intel_regions.h" #include "intel_blit.h" #include "intel_buffer_objects.h" #include "dri_bufmgr.h" -#include "intel_bufmgr_ttm.h" +#include "intel_bufmgr.h" #include "intel_batchbuffer.h" +#include "intel_chipset.h" #define FILE_DEBUG_FLAG DEBUG_REGION @@ -76,10 +80,34 @@ intel_region_unmap(struct intel_context *intel, struct intel_region *region) } } +static int +intel_set_region_tiling_gem(struct intel_context *intel, + struct intel_region *region, + uint32_t bo_handle) +{ + struct drm_i915_gem_get_tiling get_tiling; + int ret; + + memset(&get_tiling, 0, sizeof(get_tiling)); + + get_tiling.handle = bo_handle; + ret = ioctl(intel->driFd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling); + if (ret != 0) { + fprintf(stderr, "Failed to get tiling state for region: %s\n", + strerror(errno)); + return ret; + } + + region->tiling = get_tiling.tiling_mode; + region->bit_6_swizzle = get_tiling.swizzle_mode; + + return 0; +} + static struct intel_region * intel_region_alloc_internal(struct intel_context *intel, GLuint cpp, GLuint pitch, GLuint height, - GLuint tiled, dri_bo *buffer) + dri_bo *buffer) { struct intel_region *region; @@ -93,9 +121,12 @@ intel_region_alloc_internal(struct intel_context *intel, region->pitch = pitch; region->height = height; /* needed? */ region->refcount = 1; - region->tiled = tiled; region->buffer = buffer; + /* Default to no tiling */ + region->tiling = I915_TILING_NONE; + region->bit_6_swizzle = I915_BIT_6_SWIZZLE_NONE; + return region; } @@ -106,25 +137,28 @@ intel_region_alloc(struct intel_context *intel, dri_bo *buffer; buffer = dri_bo_alloc(intel->bufmgr, "region", - pitch * cpp * height, 64, - DRM_BO_FLAG_MEM_LOCAL | - DRM_BO_FLAG_CACHED | - DRM_BO_FLAG_CACHED_MAPPED); + pitch * cpp * height, 64); - return intel_region_alloc_internal(intel, cpp, pitch, height, 0, buffer); + return intel_region_alloc_internal(intel, cpp, pitch, height, buffer); } struct intel_region * intel_region_alloc_for_handle(struct intel_context *intel, GLuint cpp, GLuint pitch, GLuint height, - GLuint tiled, GLuint handle) + GLuint handle) { + struct intel_region *region; dri_bo *buffer; - buffer = intel_ttm_bo_create_from_handle(intel->bufmgr, "region", handle); + buffer = intel_bo_gem_create_from_name(intel->bufmgr, "dri2 region", handle); - return intel_region_alloc_internal(intel, - cpp, pitch, height, tiled, buffer); + region = intel_region_alloc_internal(intel, cpp, pitch, height, buffer); + if (region == NULL) + return region; + + intel_set_region_tiling_gem(intel, region, handle); + + return region; } void @@ -138,26 +172,34 @@ intel_region_reference(struct intel_region **dst, struct intel_region *src) } void -intel_region_release(struct intel_region **region) +intel_region_release(struct intel_region **region_handle) { - if (!*region) + struct intel_region *region = *region_handle; + + if (region == NULL) return; - DBG("%s %d\n", __FUNCTION__, (*region)->refcount - 1); + DBG("%s %d\n", __FUNCTION__, region->refcount - 1); + + ASSERT(region->refcount > 0); + region->refcount--; - ASSERT((*region)->refcount > 0); - (*region)->refcount--; + if (region->refcount == 0) { + assert(region->map_refcount == 0); - if ((*region)->refcount == 0) { - assert((*region)->map_refcount == 0); + if (region->pbo) + region->pbo->region = NULL; + region->pbo = NULL; + dri_bo_unreference(region->buffer); + + if (region->classic_map != NULL) { + drmUnmap(region->classic_map, + region->pitch * region->cpp * region->height); + } - if ((*region)->pbo) - (*region)->pbo->region = NULL; - (*region)->pbo = NULL; - dri_bo_unreference((*region)->buffer); - free(*region); + free(region); } - *region = NULL; + *region_handle = NULL; } /* @@ -272,8 +314,8 @@ intel_region_copy(struct intel_context *intel, intelEmitCopyBlit(intel, dst->cpp, - src->pitch, src->buffer, src_offset, src->tiled, - dst->pitch, dst->buffer, dst_offset, dst->tiled, + src->pitch, src->buffer, src_offset, src->tiling, + dst->pitch, dst->buffer, dst_offset, dst->tiling, srcx, srcy, dstx, dsty, width, height, GL_COPY); } @@ -303,7 +345,7 @@ intel_region_fill(struct intel_context *intel, intelEmitFillBlit(intel, dst->cpp, - dst->pitch, dst->buffer, dst_offset, dst->tiled, + dst->pitch, dst->buffer, dst_offset, dst->tiling, dstx, dsty, width, height, color); } @@ -355,10 +397,7 @@ intel_region_release_pbo(struct intel_context *intel, region->buffer = dri_bo_alloc(intel->bufmgr, "region", region->pitch * region->cpp * region->height, - 64, - DRM_BO_FLAG_MEM_LOCAL | - DRM_BO_FLAG_CACHED | - DRM_BO_FLAG_CACHED_MAPPED); + 64); } /* Break the COW tie to the pbo. Both the pbo and the region end up @@ -382,23 +421,19 @@ intel_region_cow(struct intel_context *intel, struct intel_region *region) /* Now blit from the texture buffer to the new buffer: */ - intel_batchbuffer_flush(intel->batch); - was_locked = intel->locked; - if (intel->locked) + if (!was_locked) LOCK_HARDWARE(intel); intelEmitCopyBlit(intel, region->cpp, - region->pitch, region->buffer, 0, region->tiled, - region->pitch, pbo->buffer, 0, region->tiled, + region->pitch, region->buffer, 0, region->tiling, + region->pitch, pbo->buffer, 0, region->tiling, 0, 0, 0, 0, region->pitch, region->height, GL_COPY); - intel_batchbuffer_flush(intel->batch); - - if (was_locked) + if (!was_locked) UNLOCK_HARDWARE(intel); } @@ -423,6 +458,7 @@ intel_recreate_static(struct intel_context *intel, intelRegion *region_desc) { intelScreenPrivate *intelScreen = intel->intelScreen; + int ret; if (region == NULL) { region = calloc(sizeof(*region), 1); @@ -435,21 +471,45 @@ intel_recreate_static(struct intel_context *intel, region->cpp = intel->ctx.Visual.rgbBits / 8; region->pitch = intelScreen->pitch; region->height = intelScreen->height; /* needed? */ - region->tiled = region_desc->tiled; if (intel->ttm) { assert(region_desc->bo_handle != -1); - region->buffer = intel_ttm_bo_create_from_handle(intel->bufmgr, - name, - region_desc->bo_handle); + region->buffer = intel_bo_gem_create_from_name(intel->bufmgr, + name, + region_desc->bo_handle); + + intel_set_region_tiling_gem(intel, region, region_desc->bo_handle); } else { - region->buffer = dri_bo_alloc_static(intel->bufmgr, - name, - region_desc->offset, - intelScreen->pitch * - intelScreen->height, - region_desc->map, - DRM_BO_FLAG_MEM_TT); + ret = drmMap(intel->driFd, region_desc->handle, + region->pitch * region->cpp * region->height, + ®ion->classic_map); + if (ret != 0) { + fprintf(stderr, "Failed to drmMap %s buffer\n", name); + free(region); + return NULL; + } + + region->buffer = intel_bo_fake_alloc_static(intel->bufmgr, + name, + region_desc->offset, + region->pitch * region->cpp * + region->height, + region->classic_map); + + /* The sarea just gives us a boolean for whether it's tiled or not, + * instead of which tiling mode it is. Guess. + */ + if (region_desc->tiled) { + if (IS_965(intel->intelScreen->deviceID) && + region_desc == &intelScreen->depth) + region->tiling = I915_TILING_Y; + else + region->tiling = I915_TILING_X; + } else { + region->tiling = I915_TILING_NONE; + } + + region->bit_6_swizzle = I915_BIT_6_SWIZZLE_NONE; } assert(region->buffer != NULL); diff --git a/src/mesa/drivers/dri/intel/intel_regions.h b/src/mesa/drivers/dri/intel/intel_regions.h index 229f79aeba..e5f19fbb45 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.h +++ b/src/mesa/drivers/dri/intel/intel_regions.h @@ -28,6 +28,12 @@ #ifndef INTEL_REGIONS_H #define INTEL_REGIONS_H +/** @file intel_regions.h + * + * Structure definitions and prototypes for intel_region handling, which is + * the basic structure for rectangular collections of pixels stored in a dri_bo. + */ + #include "mtypes.h" #include "dri_bufmgr.h" @@ -53,8 +59,9 @@ struct intel_region GLuint map_refcount; /**< Reference count for mapping */ GLuint draw_offset; /**< Offset of drawing address within the region */ - GLboolean tiled; /**< True if the region is X or Y-tiled. Used on 965. */ - + uint32_t tiling; /**< Which tiling mode the region is in */ + uint32_t bit_6_swizzle; /**< GEM flag for address swizzling requirement */ + drmAddress classic_map; /**< drmMap of the region when not in GEM mode */ struct intel_buffer_object *pbo; /* zero-copy uploads */ }; @@ -69,7 +76,7 @@ struct intel_region *intel_region_alloc(struct intel_context *intel, struct intel_region * intel_region_alloc_for_handle(struct intel_context *intel, GLuint cpp, GLuint pitch, GLuint height, - GLuint tiled, unsigned int handle); + unsigned int handle); void intel_region_reference(struct intel_region **dst, struct intel_region *src); diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 5dded4b167..9ed89906d5 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -49,7 +49,7 @@ #include "i830_dri.h" #include "intel_regions.h" #include "intel_batchbuffer.h" -#include "intel_bufmgr_ttm.h" +#include "intel_bufmgr.h" PUBLIC const char __driConfigOptions[] = DRI_CONF_BEGIN @@ -59,7 +59,7 @@ PUBLIC const char __driConfigOptions[] = /* Options correspond to DRI_CONF_BO_REUSE_DISABLED, * DRI_CONF_BO_REUSE_ALL */ - DRI_CONF_OPT_BEGIN_V(bo_reuse, enum, 0, "0:1") + DRI_CONF_OPT_BEGIN_V(bo_reuse, enum, 1, "0:1") DRI_CONF_DESC_BEGIN(en, "Buffer object reuse") DRI_CONF_ENUM(0, "Disable buffer object reuse") DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects") @@ -90,51 +90,6 @@ intelMapScreenRegions(__DRIscreenPrivate * sPriv) { intelScreenPrivate *intelScreen = (intelScreenPrivate *) sPriv->private; - if (intelScreen->front.handle) { - if (drmMap(sPriv->fd, - intelScreen->front.handle, - intelScreen->front.size, - (drmAddress *) & intelScreen->front.map) != 0) { - _mesa_problem(NULL, "drmMap(frontbuffer) failed!"); - return GL_FALSE; - } - } - else { - _mesa_warning(NULL, "no front buffer handle in intelMapScreenRegions!"); - } - - if (0) - _mesa_printf("Back 0x%08x ", intelScreen->back.handle); - if (drmMap(sPriv->fd, - intelScreen->back.handle, - intelScreen->back.size, - (drmAddress *) & intelScreen->back.map) != 0) { - intelUnmapScreenRegions(intelScreen); - return GL_FALSE; - } - - if (intelScreen->third.handle) { - if (0) - _mesa_printf("Third 0x%08x ", intelScreen->third.handle); - if (drmMap(sPriv->fd, - intelScreen->third.handle, - intelScreen->third.size, - (drmAddress *) & intelScreen->third.map) != 0) { - intelUnmapScreenRegions(intelScreen); - return GL_FALSE; - } - } - - if (0) - _mesa_printf("Depth 0x%08x ", intelScreen->depth.handle); - if (drmMap(sPriv->fd, - intelScreen->depth.handle, - intelScreen->depth.size, - (drmAddress *) & intelScreen->depth.map) != 0) { - intelUnmapScreenRegions(intelScreen); - return GL_FALSE; - } - if (0) _mesa_printf("TEX 0x%08x ", intelScreen->tex.handle); if (intelScreen->tex.size != 0) { @@ -147,50 +102,15 @@ intelMapScreenRegions(__DRIscreenPrivate * sPriv) } } - if (0) - printf("Mappings: front: %p back: %p third: %p depth: %p tex: %p\n", - intelScreen->front.map, - intelScreen->back.map, intelScreen->third.map, - intelScreen->depth.map, intelScreen->tex.map); return GL_TRUE; } void intelUnmapScreenRegions(intelScreenPrivate * intelScreen) { -#define REALLY_UNMAP 1 - if (intelScreen->front.map) { -#if REALLY_UNMAP - if (drmUnmap(intelScreen->front.map, intelScreen->front.size) != 0) - printf("drmUnmap front failed!\n"); -#endif - intelScreen->front.map = NULL; - } - if (intelScreen->back.map) { -#if REALLY_UNMAP - if (drmUnmap(intelScreen->back.map, intelScreen->back.size) != 0) - printf("drmUnmap back failed!\n"); -#endif - intelScreen->back.map = NULL; - } - if (intelScreen->third.map) { -#if REALLY_UNMAP - if (drmUnmap(intelScreen->third.map, intelScreen->third.size) != 0) - printf("drmUnmap third failed!\n"); -#endif - intelScreen->third.map = NULL; - } - if (intelScreen->depth.map) { -#if REALLY_UNMAP - drmUnmap(intelScreen->depth.map, intelScreen->depth.size); - intelScreen->depth.map = NULL; -#endif - } if (intelScreen->tex.map) { -#if REALLY_UNMAP drmUnmap(intelScreen->tex.map, intelScreen->tex.size); intelScreen->tex.map = NULL; -#endif } } @@ -221,16 +141,16 @@ intelPrintSAREA(const struct drm_i915_sarea * sarea) sarea->height); fprintf(stderr, "SAREA: pitch: %d\n", sarea->pitch); fprintf(stderr, - "SAREA: front offset: 0x%08x size: 0x%x handle: 0x%x\n", + "SAREA: front offset: 0x%08x size: 0x%x handle: 0x%x tiled: %d\n", sarea->front_offset, sarea->front_size, - (unsigned) sarea->front_handle); + (unsigned) sarea->front_handle, sarea->front_tiled); fprintf(stderr, - "SAREA: back offset: 0x%08x size: 0x%x handle: 0x%x\n", + "SAREA: back offset: 0x%08x size: 0x%x handle: 0x%x tiled: %d\n", sarea->back_offset, sarea->back_size, - (unsigned) sarea->back_handle); - fprintf(stderr, "SAREA: depth offset: 0x%08x size: 0x%x handle: 0x%x\n", + (unsigned) sarea->back_handle, sarea->back_tiled); + fprintf(stderr, "SAREA: depth offset: 0x%08x size: 0x%x handle: 0x%x tiled: %d\n", sarea->depth_offset, sarea->depth_size, - (unsigned) sarea->depth_handle); + (unsigned) sarea->depth_handle, sarea->depth_tiled); fprintf(stderr, "SAREA: tex offset: 0x%08x size: 0x%x handle: 0x%x\n", sarea->tex_offset, sarea->tex_size, (unsigned) sarea->tex_handle); } @@ -334,8 +254,6 @@ intelHandleDrawableConfig(__DRIdrawablePrivate *dPriv, * attached. */ } -#define BUFFER_FLAG_TILED 0x0100 - /** * DRI2 entrypoint */ @@ -348,7 +266,6 @@ intelHandleBufferAttach(__DRIdrawablePrivate *dPriv, struct intel_renderbuffer *rb; struct intel_region *region; struct intel_context *intel = pcp->driverPrivate; - GLuint tiled; switch (ba->buffer.attachment) { case DRI_DRAWABLE_BUFFER_FRONT_LEFT: @@ -382,10 +299,9 @@ intelHandleBufferAttach(__DRIdrawablePrivate *dPriv, return; #endif - tiled = (ba->buffer.flags & BUFFER_FLAG_TILED) > 0; region = intel_region_alloc_for_handle(intel, ba->buffer.cpp, ba->buffer.pitch / ba->buffer.cpp, - dPriv->h, tiled, + dPriv->h, ba->buffer.handle); intel_renderbuffer_set_region(rb, region); @@ -530,14 +446,13 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, _mesa_initialize_framebuffer(&intel_fb->Base, mesaVis); /* setup the hardware-based renderbuffers */ - { - intel_fb->color_rb[0] = intel_create_renderbuffer(rgbFormat); - _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_FRONT_LEFT, - &intel_fb->color_rb[0]->Base); - } + intel_fb->color_rb[0] = intel_create_renderbuffer(rgbFormat); + _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_FRONT_LEFT, + &intel_fb->color_rb[0]->Base); if (mesaVis->doubleBufferMode) { - intel_fb->color_rb[1] = intel_create_renderbuffer(rgbFormat); + intel_fb->color_rb[1] = intel_create_renderbuffer(rgbFormat); + _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_BACK_LEFT, &intel_fb->color_rb[1]->Base); @@ -569,7 +484,7 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, else if (mesaVis->depthBits == 16) { /* just 16-bit depth buffer, no hw stencil */ struct intel_renderbuffer *depthRb - = intel_create_renderbuffer(GL_DEPTH_COMPONENT16); + = intel_create_renderbuffer(GL_DEPTH_COMPONENT16); _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, &depthRb->Base); } diff --git a/src/mesa/drivers/dri/intel/intel_screen.h b/src/mesa/drivers/dri/intel/intel_screen.h index e62b2d7c89..9a73b13951 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.h +++ b/src/mesa/drivers/dri/intel/intel_screen.h @@ -74,6 +74,8 @@ typedef struct int irq_active; int allow_batchbuffer; + int ttm; + /** * Configuration cache with default values for all contexts */ diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index 742b1b8735..edede3a74b 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -39,6 +39,224 @@ #include "swrast/swrast.h" +static void +intel_set_span_functions(struct intel_context *intel, + struct gl_renderbuffer *rb); + +#define SPAN_CACHE_SIZE 4096 + +static void +get_span_cache(struct intel_renderbuffer *irb, uint32_t offset) +{ + if (irb->span_cache == NULL) { + irb->span_cache = _mesa_malloc(SPAN_CACHE_SIZE); + irb->span_cache_offset = -1; + } + + if ((offset & ~(SPAN_CACHE_SIZE - 1)) != irb->span_cache_offset) { + irb->span_cache_offset = offset & ~(SPAN_CACHE_SIZE - 1); + dri_bo_get_subdata(irb->region->buffer, irb->span_cache_offset, + SPAN_CACHE_SIZE, irb->span_cache); + } +} + +static void +clear_span_cache(struct intel_renderbuffer *irb) +{ + irb->span_cache_offset = -1; +} + +static uint32_t +pread_32(struct intel_renderbuffer *irb, uint32_t offset) +{ + get_span_cache(irb, offset); + + return *(uint32_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1))); +} + +static uint32_t +pread_xrgb8888(struct intel_renderbuffer *irb, uint32_t offset) +{ + get_span_cache(irb, offset); + + return *(uint32_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1))) | + 0xff000000; +} + +static uint16_t +pread_16(struct intel_renderbuffer *irb, uint32_t offset) +{ + get_span_cache(irb, offset); + + return *(uint16_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1))); +} + +static uint8_t +pread_8(struct intel_renderbuffer *irb, uint32_t offset) +{ + get_span_cache(irb, offset); + + return *(uint8_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1))); +} + +static void +pwrite_32(struct intel_renderbuffer *irb, uint32_t offset, uint32_t val) +{ + clear_span_cache(irb); + + dri_bo_subdata(irb->region->buffer, offset, 4, &val); +} + +static void +pwrite_xrgb8888(struct intel_renderbuffer *irb, uint32_t offset, uint32_t val) +{ + clear_span_cache(irb); + + dri_bo_subdata(irb->region->buffer, offset, 3, &val); +} + +static void +pwrite_16(struct intel_renderbuffer *irb, uint32_t offset, uint16_t val) +{ + clear_span_cache(irb); + + dri_bo_subdata(irb->region->buffer, offset, 2, &val); +} + +static void +pwrite_8(struct intel_renderbuffer *irb, uint32_t offset, uint8_t val) +{ + clear_span_cache(irb); + + dri_bo_subdata(irb->region->buffer, offset, 1, &val); +} + +static uint32_t no_tile_swizzle(struct intel_renderbuffer *irb, + struct intel_context *intel, + int x, int y) +{ + x += intel->drawX; + y += intel->drawY; + + return (y * irb->region->pitch + x) * irb->region->cpp; +} + +/* + * Deal with tiled surfaces + */ + +static uint32_t x_tile_swizzle(struct intel_renderbuffer *irb, + struct intel_context *intel, + int x, int y) +{ + int tile_stride; + int xbyte; + int x_tile_off, y_tile_off; + int x_tile_number, y_tile_number; + int tile_off, tile_base; + + tile_stride = (irb->pfPitch * irb->region->cpp) << 3; + + x += intel->drawX; + y += intel->drawY; + + xbyte = x * irb->region->cpp; + + x_tile_off = xbyte & 0x1ff; + y_tile_off = y & 7; + + x_tile_number = xbyte >> 9; + y_tile_number = y >> 3; + + tile_off = (y_tile_off << 9) + x_tile_off; + + switch (irb->region->bit_6_swizzle) { + case I915_BIT_6_SWIZZLE_NONE: + break; + case I915_BIT_6_SWIZZLE_9: + tile_off ^= ((tile_off >> 3) & 64); + break; + case I915_BIT_6_SWIZZLE_9_10: + tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64); + break; + case I915_BIT_6_SWIZZLE_9_11: + tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 5) & 64); + break; + case I915_BIT_6_SWIZZLE_9_10_11: + tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64) ^ + ((tile_off >> 5) & 64); + break; + default: + fprintf(stderr, "Unknown tile swizzling mode %d\n", + irb->region->bit_6_swizzle); + exit(1); + } + + tile_base = (x_tile_number << 12) + y_tile_number * tile_stride; + +#if 0 + printf("(%d,%d) -> %d + %d = %d (pitch = %d, tstride = %d)\n", + x, y, tile_off, tile_base, + tile_off + tile_base, + irb->pfPitch, tile_stride); +#endif + + return tile_base + tile_off; +} + +static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, + struct intel_context *intel, + int x, int y) +{ + int tile_stride; + int xbyte; + int x_tile_off, y_tile_off; + int x_tile_number, y_tile_number; + int tile_off, tile_base; + + tile_stride = (irb->pfPitch * irb->region->cpp) << 5; + + x += intel->drawX; + y += intel->drawY; + + xbyte = x * irb->region->cpp; + + x_tile_off = xbyte & 0x7f; + y_tile_off = y & 0x1f; + + x_tile_number = xbyte >> 7; + y_tile_number = y >> 5; + + tile_off = ((x_tile_off & ~0xf) << 5) + (y_tile_off << 4) + + (x_tile_off & 0xf); + + switch (irb->region->bit_6_swizzle) { + case I915_BIT_6_SWIZZLE_NONE: + break; + case I915_BIT_6_SWIZZLE_9: + tile_off ^= ((tile_off >> 3) & 64); + break; + case I915_BIT_6_SWIZZLE_9_10: + tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64); + break; + case I915_BIT_6_SWIZZLE_9_11: + tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 5) & 64); + break; + case I915_BIT_6_SWIZZLE_9_10_11: + tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64) ^ + ((tile_off >> 5) & 64); + break; + default: + fprintf(stderr, "Unknown tile swizzling mode %d\n", + irb->region->bit_6_swizzle); + exit(1); + } + + tile_base = (x_tile_number << 12) + y_tile_number * tile_stride; + + return tile_base + tile_off; +} + /* break intelWriteRGBASpan_ARGB8888 */ @@ -51,10 +269,7 @@ struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ const GLint yScale = irb->RenderToTexture ? 1 : -1; \ const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1; \ - GLubyte *buf = (GLubyte *) irb->pfMap \ - + (intel->drawY * irb->pfPitch + intel->drawX) * irb->region->cpp;\ GLuint p; \ - assert(irb->pfMap);\ (void) p; /* XXX FBO: this is identical to the macro in spantmp2.h except we get @@ -69,12 +284,14 @@ int miny = intel->pClipRects[_nc].y1 - intel->drawY; \ int maxx = intel->pClipRects[_nc].x2 - intel->drawX; \ int maxy = intel->pClipRects[_nc].y2 - intel->drawY; - - - + +#if 0 + }} +#endif #define Y_FLIP(_y) ((_y) * yScale + yBias) +/* XXX with GEM, these need to tell the kernel */ #define HW_LOCK() #define HW_UNLOCK() @@ -86,7 +303,8 @@ #define TAG(x) intel##x##_RGB565 #define TAG2(x,y) intel##x##_RGB565##y -#define GET_PTR(X,Y) (buf + ((Y) * irb->pfPitch + (X)) * 2) +#define GET_VALUE(X, Y) pread_16(irb, no_tile_swizzle(irb, intel, X, Y)) +#define PUT_VALUE(X, Y, V) pwrite_16(irb, no_tile_swizzle(irb, intel, X, Y), V) #include "spantmp2.h" /* 32 bit, ARGB8888 color spanline and pixel functions @@ -96,17 +314,89 @@ #define TAG(x) intel##x##_ARGB8888 #define TAG2(x,y) intel##x##_ARGB8888##y -#define GET_PTR(X,Y) (buf + ((Y) * irb->pfPitch + (X)) * 4) +#define GET_VALUE(X, Y) pread_32(irb, no_tile_swizzle(irb, intel, X, Y)) +#define PUT_VALUE(X, Y, V) pwrite_32(irb, no_tile_swizzle(irb, intel, X, Y), V) +#include "spantmp2.h" + +/* 32 bit, xRGB8888 color spanline and pixel functions + */ +#define SPANTMP_PIXEL_FMT GL_BGRA +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV + +#define TAG(x) intel##x##_xRGB8888 +#define TAG2(x,y) intel##x##_xRGB8888##y +#define GET_VALUE(X, Y) pread_xrgb8888(irb, no_tile_swizzle(irb, intel, X, Y)) +#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, no_tile_swizzle(irb, intel, X, Y), V) +#include "spantmp2.h" + +/* 16 bit RGB565 color tile spanline and pixel functions + */ + +#define SPANTMP_PIXEL_FMT GL_RGB +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 + +#define TAG(x) intel_XTile_##x##_RGB565 +#define TAG2(x,y) intel_XTile_##x##_RGB565##y +#define GET_VALUE(X, Y) pread_16(irb, x_tile_swizzle(irb, intel, X, Y)) +#define PUT_VALUE(X, Y, V) pwrite_16(irb, x_tile_swizzle(irb, intel, X, Y), V) +#include "spantmp2.h" + +#define SPANTMP_PIXEL_FMT GL_RGB +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 + +#define TAG(x) intel_YTile_##x##_RGB565 +#define TAG2(x,y) intel_YTile_##x##_RGB565##y +#define GET_VALUE(X, Y) pread_16(irb, y_tile_swizzle(irb, intel, X, Y)) +#define PUT_VALUE(X, Y, V) pwrite_16(irb, y_tile_swizzle(irb, intel, X, Y), V) +#include "spantmp2.h" + +/* 32 bit ARGB888 color tile spanline and pixel functions + */ + +#define SPANTMP_PIXEL_FMT GL_BGRA +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV + +#define TAG(x) intel_XTile_##x##_ARGB8888 +#define TAG2(x,y) intel_XTile_##x##_ARGB8888##y +#define GET_VALUE(X, Y) pread_32(irb, x_tile_swizzle(irb, intel, X, Y)) +#define PUT_VALUE(X, Y, V) pwrite_32(irb, x_tile_swizzle(irb, intel, X, Y), V) +#include "spantmp2.h" + +#define SPANTMP_PIXEL_FMT GL_BGRA +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV + +#define TAG(x) intel_YTile_##x##_ARGB8888 +#define TAG2(x,y) intel_YTile_##x##_ARGB8888##y +#define GET_VALUE(X, Y) pread_32(irb, y_tile_swizzle(irb, intel, X, Y)) +#define PUT_VALUE(X, Y, V) pwrite_32(irb, y_tile_swizzle(irb, intel, X, Y), V) +#include "spantmp2.h" + +/* 32 bit xRGB888 color tile spanline and pixel functions + */ + +#define SPANTMP_PIXEL_FMT GL_BGRA +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV + +#define TAG(x) intel_XTile_##x##_xRGB8888 +#define TAG2(x,y) intel_XTile_##x##_xRGB8888##y +#define GET_VALUE(X, Y) pread_xrgb8888(irb, x_tile_swizzle(irb, intel, X, Y)) +#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, x_tile_swizzle(irb, intel, X, Y), V) +#include "spantmp2.h" + +#define SPANTMP_PIXEL_FMT GL_BGRA +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV + +#define TAG(x) intel_YTile_##x##_xRGB8888 +#define TAG2(x,y) intel_YTile_##x##_xRGB8888##y +#define GET_VALUE(X, Y) pread_xrgb8888(irb, y_tile_swizzle(irb, intel, X, Y)) +#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, y_tile_swizzle(irb, intel, X, Y), V) #include "spantmp2.h" #define LOCAL_DEPTH_VARS \ struct intel_context *intel = intel_context(ctx); \ struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ - const GLuint pitch = irb->pfPitch/***XXX region->pitch*/; /* in pixels */ \ const GLint yScale = irb->RenderToTexture ? 1 : -1; \ - const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1; \ - char *buf = (char *) irb->pfMap/*XXX use region->map*/ + \ - (intel->drawY * pitch + intel->drawX) * irb->region->cpp; + const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1; #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS @@ -115,15 +405,34 @@ ** 16-bit depthbuffer functions. **/ #define VALUE_TYPE GLushort +#define WRITE_DEPTH(_x, _y, d) \ + pwrite_16(irb, no_tile_swizzle(irb, intel, _x, _y), d) +#define READ_DEPTH(d, _x, _y) \ + d = pread_16(irb, no_tile_swizzle(irb, intel, _x, _y)) +#define TAG(x) intel##x##_z16 +#include "depthtmp.h" -#define WRITE_DEPTH( _x, _y, d ) \ - ((GLushort *)buf)[(_x) + (_y) * pitch] = d; - -#define READ_DEPTH( d, _x, _y ) \ - d = ((GLushort *)buf)[(_x) + (_y) * pitch]; +/** + ** 16-bit x tile depthbuffer functions. + **/ +#define VALUE_TYPE GLushort +#define WRITE_DEPTH(_x, _y, d) \ + pwrite_16(irb, x_tile_swizzle(irb, intel, _x, _y), d) +#define READ_DEPTH(d, _x, _y) \ + d = pread_16(irb, x_tile_swizzle(irb, intel, _x, _y)) +#define TAG(x) intel_XTile_##x##_z16 +#include "depthtmp.h" -#define TAG(x) intel##x##_z16 +/** + ** 16-bit y tile depthbuffer functions. + **/ +#define VALUE_TYPE GLushort +#define WRITE_DEPTH(_x, _y, d) \ + pwrite_16(irb, y_tile_swizzle(irb, intel, _x, _y), d) +#define READ_DEPTH(d, _x, _y) \ + d = pread_16(irb, y_tile_swizzle(irb, intel, _x, _y)) +#define TAG(x) intel_YTile_##x##_z16 #include "depthtmp.h" @@ -136,14 +445,13 @@ #define VALUE_TYPE GLuint /* Change ZZZS -> SZZZ */ -#define WRITE_DEPTH( _x, _y, d ) { \ - GLuint tmp = ((d) >> 8) | ((d) << 24); \ - ((GLuint *)buf)[(_x) + (_y) * pitch] = tmp; \ -} +#define WRITE_DEPTH(_x, _y, d) \ + pwrite_32(irb, no_tile_swizzle(irb, intel, _x, _y), \ + ((d) >> 8) | ((d) << 24)) /* Change SZZZ -> ZZZS */ #define READ_DEPTH( d, _x, _y ) { \ - GLuint tmp = ((GLuint *)buf)[(_x) + (_y) * pitch]; \ + GLuint tmp = pread_32(irb, no_tile_swizzle(irb, intel, _x, _y)); \ d = (tmp << 8) | (tmp >> 24); \ } @@ -152,22 +460,114 @@ /** - ** 8-bit stencil function (XXX FBO: This is obsolete) + ** 24/8-bit x-tile interleaved depth/stencil functions + ** Note: we're actually reading back combined depth+stencil values. + ** The wrappers in main/depthstencil.c are used to extract the depth + ** and stencil values. **/ -#define WRITE_STENCIL( _x, _y, d ) { \ - GLuint tmp = ((GLuint *)buf)[(_x) + (_y) * pitch]; \ - tmp &= 0xffffff; \ - tmp |= ((d) << 24); \ - ((GLuint *) buf)[(_x) + (_y) * pitch] = tmp; \ +#define VALUE_TYPE GLuint + +/* Change ZZZS -> SZZZ */ +#define WRITE_DEPTH(_x, _y, d) \ + pwrite_32(irb, x_tile_swizzle(irb, intel, _x, _y), \ + ((d) >> 8) | ((d) << 24)) \ + +/* Change SZZZ -> ZZZS */ +#define READ_DEPTH( d, _x, _y ) { \ + GLuint tmp = pread_32(irb, x_tile_swizzle(irb, intel, _x, _y)); \ + d = (tmp << 8) | (tmp >> 24); \ } -#define READ_STENCIL( d, _x, _y ) \ - d = ((GLuint *)buf)[(_x) + (_y) * pitch] >> 24; +#define TAG(x) intel_XTile_##x##_z24_s8 +#include "depthtmp.h" + +/** + ** 24/8-bit y-tile interleaved depth/stencil functions + ** Note: we're actually reading back combined depth+stencil values. + ** The wrappers in main/depthstencil.c are used to extract the depth + ** and stencil values. + **/ +#define VALUE_TYPE GLuint + +/* Change ZZZS -> SZZZ */ +#define WRITE_DEPTH(_x, _y, d) \ + pwrite_32(irb, y_tile_swizzle(irb, intel, _x, _y), \ + ((d) >> 8) | ((d) << 24)) + +/* Change SZZZ -> ZZZS */ +#define READ_DEPTH( d, _x, _y ) { \ + GLuint tmp = pread_32(irb, y_tile_swizzle(irb, intel, _x, _y)); \ + d = (tmp << 8) | (tmp >> 24); \ +} + +#define TAG(x) intel_YTile_##x##_z24_s8 +#include "depthtmp.h" + + +/** + ** 8-bit stencil function (XXX FBO: This is obsolete) + **/ +#define WRITE_STENCIL(_x, _y, d) \ + pwrite_8(irb, no_tile_swizzle(irb, intel, _x, _y) + 3, d) + +#define READ_STENCIL(d, _x, _y) \ + d = pread_8(irb, no_tile_swizzle(irb, intel, _x, _y) + 3); #define TAG(x) intel##x##_z24_s8 #include "stenciltmp.h" +/** + ** 8-bit x-tile stencil function (XXX FBO: This is obsolete) + **/ +#define WRITE_STENCIL(_x, _y, d) \ + pwrite_8(irb, x_tile_swizzle(irb, intel, _x, _y) + 3, d) +#define READ_STENCIL(d, _x, _y) \ + d = pread_8(irb, x_tile_swizzle(irb, intel, _x, _y) + 3); + +#define TAG(x) intel_XTile_##x##_z24_s8 +#include "stenciltmp.h" + +/** + ** 8-bit y-tile stencil function (XXX FBO: This is obsolete) + **/ +#define WRITE_STENCIL(_x, _y, d) \ + pwrite_8(irb, y_tile_swizzle(irb, intel, _x, _y) + 3, d) + +#define READ_STENCIL(d, _x, _y) \ + d = pread_8(irb, y_tile_swizzle(irb, intel, _x, _y) + 3) + +#define TAG(x) intel_YTile_##x##_z24_s8 +#include "stenciltmp.h" + +void +intel_renderbuffer_map(struct intel_context *intel, struct gl_renderbuffer *rb) +{ + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + + if (irb == NULL || irb->region == NULL) + return; + + irb->pfPitch = irb->region->pitch; + + intel_set_span_functions(intel, rb); +} + +void +intel_renderbuffer_unmap(struct intel_context *intel, + struct gl_renderbuffer *rb) +{ + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + + if (irb == NULL || irb->region == NULL) + return; + + clear_span_cache(irb); + irb->pfPitch = 0; + + rb->GetRow = NULL; + rb->PutRow = NULL; +} /** * Map or unmap all the renderbuffers which we may need during @@ -186,23 +586,13 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map) { GLcontext *ctx = &intel->ctx; GLuint i, j; - struct intel_renderbuffer *irb; /* color draw buffers */ for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++) { - struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[j]; - irb = intel_renderbuffer(rb); - if (irb) { - /* this is a user-created intel_renderbuffer */ - if (irb->region) { - if (map) - intel_region_map(intel, irb->region); - else - intel_region_unmap(intel, irb->region); - irb->pfMap = irb->region->map; - irb->pfPitch = irb->region->pitch; - } - } + if (map) + intel_renderbuffer_map(intel, ctx->DrawBuffer->_ColorDrawBuffers[j]); + else + intel_renderbuffer_unmap(intel, ctx->DrawBuffer->_ColorDrawBuffers[j]); } /* check for render to textures */ @@ -225,77 +615,28 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map) } /* color read buffers */ - irb = intel_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer); - if (irb && irb->region) { - if (map) - intel_region_map(intel, irb->region); - else - intel_region_unmap(intel, irb->region); - irb->pfMap = irb->region->map; - irb->pfPitch = irb->region->pitch; - } - - /* Account for front/back color page flipping. - * The span routines use the pfMap and pfPitch fields which will - * swap the front/back region map/pitch if we're page flipped. - * Do this after mapping, above, so the map field is valid. - */ -#if 0 - if (map && ctx->DrawBuffer->Name == 0) { - struct intel_renderbuffer *irbFront - = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_FRONT_LEFT); - struct intel_renderbuffer *irbBack - = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_BACK_LEFT); - if (irbBack) { - /* double buffered */ - if (intel->sarea->pf_current_page == 0) { - irbFront->pfMap = irbFront->region->map; - irbFront->pfPitch = irbFront->region->pitch; - irbBack->pfMap = irbBack->region->map; - irbBack->pfPitch = irbBack->region->pitch; - } - else { - irbFront->pfMap = irbBack->region->map; - irbFront->pfPitch = irbBack->region->pitch; - irbBack->pfMap = irbFront->region->map; - irbBack->pfPitch = irbFront->region->pitch; - } - } - } -#endif + if (map) + intel_renderbuffer_map(intel, ctx->ReadBuffer->_ColorReadBuffer); + else + intel_renderbuffer_unmap(intel, ctx->ReadBuffer->_ColorReadBuffer); /* depth buffer (Note wrapper!) */ if (ctx->DrawBuffer->_DepthBuffer) { - irb = intel_renderbuffer(ctx->DrawBuffer->_DepthBuffer->Wrapped); - if (irb && irb->region) { - if (map) { - intel_region_map(intel, irb->region); - irb->pfMap = irb->region->map; - irb->pfPitch = irb->region->pitch; - } - else { - intel_region_unmap(intel, irb->region); - irb->pfMap = irb->region->map; - irb->pfPitch = irb->region->pitch; - } - } + if (map) + intel_renderbuffer_map(intel, ctx->DrawBuffer->_DepthBuffer->Wrapped); + else + intel_renderbuffer_unmap(intel, + ctx->DrawBuffer->_DepthBuffer->Wrapped); } /* stencil buffer (Note wrapper!) */ if (ctx->DrawBuffer->_StencilBuffer) { - irb = intel_renderbuffer(ctx->DrawBuffer->_StencilBuffer->Wrapped); - if (irb && irb->region) { - if (map) { - intel_region_map(intel, irb->region); - irb->pfMap = irb->region->map; - irb->pfPitch = irb->region->pitch; - } - else { - intel_region_unmap(intel, irb->region); - irb->pfMap = irb->region->map; - irb->pfPitch = irb->region->pitch; - } - } + if (map) + intel_renderbuffer_map(intel, + ctx->DrawBuffer->_StencilBuffer->Wrapped); + else + intel_renderbuffer_unmap(intel, + ctx->DrawBuffer->_StencilBuffer->Wrapped); } } @@ -313,18 +654,9 @@ intelSpanRenderStart(GLcontext * ctx) struct intel_context *intel = intel_context(ctx); GLuint i; - intelFinish(&intel->ctx); + intelFlush(&intel->ctx); LOCK_HARDWARE(intel); -#if 0 - /* Just map the framebuffer and all textures. Bufmgr code will - * take care of waiting on the necessary fences: - */ - intel_region_map(intel, intel->front_region); - intel_region_map(intel, intel->back_region); - intel_region_map(intel, intel->depth_region); -#endif - for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) { if (ctx->Texture.Unit[i]._ReallyEnabled) { struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current; @@ -347,14 +679,6 @@ intelSpanRenderFinish(GLcontext * ctx) _swrast_flush(ctx); - /* Now unmap the framebuffer: - */ -#if 0 - intel_region_unmap(intel, intel->front_region); - intel_region_unmap(intel, intel->back_region); - intel_region_unmap(intel, intel->depth_region); -#endif - for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) { if (ctx->Texture.Unit[i]._ReallyEnabled) { struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current; @@ -381,26 +705,108 @@ intelInitSpanFuncs(GLcontext * ctx) * Plug in appropriate span read/write functions for the given renderbuffer. * These are used for the software fallbacks. */ -void -intel_set_span_functions(struct gl_renderbuffer *rb) +static void +intel_set_span_functions(struct intel_context *intel, + struct gl_renderbuffer *rb) { + struct intel_renderbuffer *irb = (struct intel_renderbuffer *) rb; + uint32_t tiling; + + /* If in GEM mode, we need to do the tile address swizzling ourselves, + * instead of the fence registers handling it. + */ + if (intel->ttm) + tiling = irb->region->tiling; + else + tiling = I915_TILING_NONE; + if (rb->_ActualFormat == GL_RGB5) { /* 565 RGB */ - intelInitPointers_RGB565(rb); + switch (tiling) { + case I915_TILING_NONE: + default: + intelInitPointers_RGB565(rb); + break; + case I915_TILING_X: + intel_XTile_InitPointers_RGB565(rb); + break; + case I915_TILING_Y: + intel_YTile_InitPointers_RGB565(rb); + break; + } + } + else if (rb->_ActualFormat == GL_RGB8) { + /* 8888 RGBx */ + switch (tiling) { + case I915_TILING_NONE: + default: + intelInitPointers_xRGB8888(rb); + break; + case I915_TILING_X: + intel_XTile_InitPointers_xRGB8888(rb); + break; + case I915_TILING_Y: + intel_YTile_InitPointers_xRGB8888(rb); + break; + } } else if (rb->_ActualFormat == GL_RGBA8) { /* 8888 RGBA */ - intelInitPointers_ARGB8888(rb); + switch (tiling) { + case I915_TILING_NONE: + default: + intelInitPointers_ARGB8888(rb); + break; + case I915_TILING_X: + intel_XTile_InitPointers_ARGB8888(rb); + break; + case I915_TILING_Y: + intel_YTile_InitPointers_ARGB8888(rb); + break; + } } else if (rb->_ActualFormat == GL_DEPTH_COMPONENT16) { - intelInitDepthPointers_z16(rb); + switch (tiling) { + case I915_TILING_NONE: + default: + intelInitDepthPointers_z16(rb); + break; + case I915_TILING_X: + intel_XTile_InitDepthPointers_z16(rb); + break; + case I915_TILING_Y: + intel_YTile_InitDepthPointers_z16(rb); + break; + } } else if (rb->_ActualFormat == GL_DEPTH_COMPONENT24 || /* XXX FBO remove */ rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT) { - intelInitDepthPointers_z24_s8(rb); + switch (tiling) { + case I915_TILING_NONE: + default: + intelInitDepthPointers_z24_s8(rb); + break; + case I915_TILING_X: + intel_XTile_InitDepthPointers_z24_s8(rb); + break; + case I915_TILING_Y: + intel_YTile_InitDepthPointers_z24_s8(rb); + break; + } } - else if (rb->_ActualFormat == GL_STENCIL_INDEX8_EXT) { /* XXX FBO remove */ - intelInitStencilPointers_z24_s8(rb); + else if (rb->_ActualFormat == GL_STENCIL_INDEX8_EXT) { + switch (tiling) { + case I915_TILING_NONE: + default: + intelInitStencilPointers_z24_s8(rb); + break; + case I915_TILING_X: + intel_XTile_InitStencilPointers_z24_s8(rb); + break; + case I915_TILING_Y: + intel_YTile_InitStencilPointers_z24_s8(rb); + break; + } } else { _mesa_problem(NULL, diff --git a/src/mesa/drivers/dri/intel/intel_span.h b/src/mesa/drivers/dri/intel/intel_span.h index 5201f6d6c6..acbeb4abe1 100644 --- a/src/mesa/drivers/dri/intel/intel_span.h +++ b/src/mesa/drivers/dri/intel/intel_span.h @@ -32,7 +32,9 @@ extern void intelInitSpanFuncs(GLcontext * ctx); extern void intelSpanRenderFinish(GLcontext * ctx); extern void intelSpanRenderStart(GLcontext * ctx); - -extern void intel_set_span_functions(struct gl_renderbuffer *rb); +void intel_renderbuffer_map(struct intel_context *intel, + struct gl_renderbuffer *rb); +void intel_renderbuffer_unmap(struct intel_context *intel, + struct gl_renderbuffer *rb); #endif diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index 1add7c6188..d0ab464a1c 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -144,15 +144,13 @@ do_copy_texsubimage(struct intel_context *intel, -src->pitch, src->buffer, src->height * src->pitch * src->cpp, - GL_FALSE, + src->tiling, intelImage->mt->pitch, intelImage->mt->region->buffer, image_offset, - intelImage->mt->region->tiled, + intelImage->mt->region->tiling, x, y + height, dstx, dsty, width, height, GL_COPY); /* ? */ - - intel_batchbuffer_flush(intel->batch); } } diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index f261034c18..c11687a2cf 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -238,8 +238,6 @@ try_pbo_upload(struct intel_context *intel, dst_stride, dst_buffer, dst_offset, GL_FALSE, 0, 0, 0, 0, width, height, GL_COPY); - - intel_batchbuffer_flush(intel->batch); } UNLOCK_HARDWARE(intel); @@ -400,10 +398,25 @@ intelTexImage(GLcontext * ctx, intel_miptree_reference(&intelImage->mt, intelObj->mt); assert(intelImage->mt); - } + } else if (intelImage->base.Border == 0) { + int comp_byte = 0; + + if (intelImage->base.IsCompressed) { + comp_byte = + intel_compressed_num_bytes(intelImage->base.TexFormat->MesaFormat); + } - if (!intelImage->mt) - DBG("XXX: Image did not fit into tree - storing in local memory!\n"); + /* Didn't fit in the object miptree, but it's suitable for inclusion in + * a miptree, so create one just for our level and store it in the image. + * It'll get moved into the object miptree at validate time. + */ + intelImage->mt = intel_miptree_create(intel, target, internalFormat, + level, level, + width, height, depth, + intelImage->base.TexFormat->TexelBytes, + comp_byte); + + } /* PBO fastpaths: */ diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c index 1b3aa89c05..5763f4ae1f 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_validate.c +++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c @@ -125,13 +125,10 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) struct intel_texture_object *intelObj = intel_texture_object(tObj); int comp_byte = 0; int cpp; - GLuint face, i; GLuint nr_faces = 0; struct intel_texture_image *firstImage; - GLboolean need_flush = GL_FALSE; - /* We know/require this is true by now: */ assert(intelObj->base._Complete); @@ -227,21 +224,10 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) */ if (intelObj->mt != intelImage->mt) { copy_image_data_to_tree(intel, intelObj, intelImage); - need_flush = GL_TRUE; } } } -#ifdef I915 - /* XXX: what is this flush about? - * On 965, it causes a batch flush in the middle of the state relocation - * emits, which means that the eventual rendering doesn't have all of the - * required relocations in place. - */ - if (need_flush) - intel_batchbuffer_flush(intel->batch); -#endif - return GL_TRUE; } -- cgit v1.2.3