From 70be48dff6bb68c61285641e4d976bfd53e0f00c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 29 Jan 2010 11:03:04 -0800 Subject: i965: Untested Sandybridge SF setup. --- src/mesa/drivers/dri/intel/intel_batchbuffer.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'src/mesa/drivers/dri/intel/intel_batchbuffer.h') diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h index b052b724d8..4daada205a 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -96,6 +96,17 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch, intel_batchbuffer_flush(batch); } +static INLINE uint32_t float_as_int(float f) +{ + union { + float f; + uint32_t d; + } fi; + + fi.f = f; + return fi.d; +} + /* Here are the crusty old macros, to be removed: */ #define BATCH_LOCALS @@ -108,6 +119,8 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch, } while (0) #define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d) +#define OUT_BATCH_F(f) intel_batchbuffer_emit_dword(intel->batch, \ + float_as_int(f)) #define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ assert((unsigned) (delta) < buf->size); \ -- cgit v1.2.3 From 06d1472ffa0648efa9374fa227894fbf0b0be054 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 2 Mar 2010 18:04:40 -0800 Subject: i915: Tell the kernel when we actually need fence registers on our BOs. This improves tiled texture performance of OA on my 945 from 25.3fps to 29.0fps, whereas untiled is 28.2fps, by avoiding stalls for fence register changes. --- src/mesa/drivers/dri/intel/intel_batchbuffer.c | 25 +++++++++++++++++++++++++ src/mesa/drivers/dri/intel/intel_batchbuffer.h | 10 ++++++++++ src/mesa/drivers/dri/intel/intel_blit.c | 24 ++++++++++++------------ src/mesa/drivers/dri/intel/intel_screen.c | 2 ++ 4 files changed, 49 insertions(+), 12 deletions(-) (limited to 'src/mesa/drivers/dri/intel/intel_batchbuffer.h') diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index e38f10ebc6..a7bfd62b28 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -226,6 +226,31 @@ intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, return GL_TRUE; } +GLboolean +intel_batchbuffer_emit_reloc_fenced(struct intel_batchbuffer *batch, + drm_intel_bo *buffer, + uint32_t read_domains, uint32_t write_domain, + uint32_t delta) +{ + int ret; + + if (batch->ptr - batch->map > batch->buf->size) + printf ("bad relocation ptr %p map %p offset %d size %lu\n", + batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size); + ret = drm_intel_bo_emit_reloc_fence(batch->buf, batch->ptr - batch->map, + buffer, delta, + read_domains, write_domain); + + /* + * Using the old buffer offset, write in what the right data would + * be, in case the buffer doesn't move and we can short-circuit the + * relocation processing in the kernel + */ + intel_batchbuffer_emit_dword (batch, buffer->offset + delta); + + return GL_TRUE; +} + void intel_batchbuffer_data(struct intel_batchbuffer *batch, const void *data, GLuint bytes) diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h index 4daada205a..79bdbc17ae 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -64,6 +64,11 @@ GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, uint32_t read_domains, uint32_t write_domain, uint32_t offset); +GLboolean intel_batchbuffer_emit_reloc_fenced(struct intel_batchbuffer *batch, + drm_intel_bo *buffer, + uint32_t read_domains, + uint32_t write_domain, + uint32_t offset); void intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch); /* Inline functions - might actually be better off with these @@ -127,6 +132,11 @@ static INLINE uint32_t float_as_int(float f) intel_batchbuffer_emit_reloc(intel->batch, buf, \ read_domains, write_domain, delta); \ } while (0) +#define OUT_RELOC_FENCED(buf, read_domains, write_domain, delta) do { \ + assert((unsigned) (delta) < buf->size); \ + intel_batchbuffer_emit_reloc_fenced(intel->batch, buf, \ + read_domains, write_domain, delta); \ +} while (0) #define ADVANCE_BATCH() do { \ unsigned int _n = intel->batch->ptr - intel->batch->emit.start_ptr; \ diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index 1d099e7c47..6d6af86347 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -188,14 +188,14 @@ intelEmitCopyBlit(struct intel_context *intel, OUT_BATCH(BR13 | (uint16_t)dst_pitch); OUT_BATCH((dst_y << 16) | dst_x); OUT_BATCH((dst_y2 << 16) | dst_x2); - OUT_RELOC(dst_buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - dst_offset); + OUT_RELOC_FENCED(dst_buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + dst_offset); OUT_BATCH((src_y << 16) | src_x); OUT_BATCH((uint16_t)src_pitch); - OUT_RELOC(src_buffer, - I915_GEM_DOMAIN_RENDER, 0, - src_offset); + OUT_RELOC_FENCED(src_buffer, + I915_GEM_DOMAIN_RENDER, 0, + src_offset); ADVANCE_BATCH(); intel_batchbuffer_emit_mi_flush(intel->batch); @@ -365,9 +365,9 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) OUT_BATCH(BR13); OUT_BATCH((y1 << 16) | x1); OUT_BATCH((y2 << 16) | x2); - OUT_RELOC(write_buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - 0); + OUT_RELOC_FENCED(write_buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0); OUT_BATCH(clear_val); ADVANCE_BATCH(); @@ -448,9 +448,9 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, OUT_BATCH(br13); OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */ OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */ - OUT_RELOC(dst_buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - dst_offset); + OUT_RELOC_FENCED(dst_buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + dst_offset); OUT_BATCH(0); /* bg */ OUT_BATCH(fg_color); /* fg */ OUT_BATCH(0); /* pattern base addr */ diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index bc394d048e..a42af71104 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -426,6 +426,8 @@ intel_init_bufmgr(struct intel_screen *intelScreen) else intelScreen->kernel_exec_fencing = GL_FALSE; + drm_intel_bufmgr_gem_enable_fenced_relocs(intelScreen->bufmgr); + intelScreen->named_regions = _mesa_NewHashTable(); return GL_TRUE; -- cgit v1.2.3 From 8a9d8bd52af4414e59320c68238b8929edf3ff80 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Mar 2010 12:27:46 -0800 Subject: intel: Check that the batch is mapped per BEGIN, not each OUT. Shaves 800 bytes off the driver. --- src/mesa/drivers/dri/intel/intel_batchbuffer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/intel/intel_batchbuffer.h') diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h index 79bdbc17ae..860aa1327a 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -86,7 +86,6 @@ intel_batchbuffer_space(struct intel_batchbuffer *batch) static INLINE void intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, GLuint dword) { - assert(batch->map); assert(intel_batchbuffer_space(batch) >= 4); *(GLuint *) (batch->ptr) = dword; batch->ptr += 4; @@ -118,6 +117,7 @@ static INLINE uint32_t float_as_int(float f) #define BEGIN_BATCH(n) do { \ intel_batchbuffer_require_space(intel->batch, (n)*4); \ + assert(intel->batch->map); \ assert(intel->batch->emit.start_ptr == NULL); \ intel->batch->emit.total = (n) * 4; \ intel->batch->emit.start_ptr = intel->batch->ptr; \ -- cgit v1.2.3 From 342a7f23bf76e21b049cba9ab97bf4aa640a5bfd Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Mar 2010 12:40:45 -0800 Subject: intel: Move the assertions about reloc delta from the macros to the function. Cuts another 1800 bytes from the driver. --- src/mesa/drivers/dri/intel/intel_batchbuffer.c | 4 ++++ src/mesa/drivers/dri/intel/intel_batchbuffer.h | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'src/mesa/drivers/dri/intel/intel_batchbuffer.h') diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index a7bfd62b28..9768b0deee 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -210,6 +210,8 @@ intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, { int ret; + assert(delta < buffer->size); + if (batch->ptr - batch->map > batch->buf->size) printf ("bad relocation ptr %p map %p offset %d size %lu\n", batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size); @@ -234,6 +236,8 @@ intel_batchbuffer_emit_reloc_fenced(struct intel_batchbuffer *batch, { int ret; + assert(delta < buffer->size); + if (batch->ptr - batch->map > batch->buf->size) printf ("bad relocation ptr %p map %p offset %d size %lu\n", batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size); diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h index 860aa1327a..b7625b5b0d 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -128,12 +128,10 @@ static INLINE uint32_t float_as_int(float f) float_as_int(f)) #define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ - assert((unsigned) (delta) < buf->size); \ intel_batchbuffer_emit_reloc(intel->batch, buf, \ read_domains, write_domain, delta); \ } while (0) #define OUT_RELOC_FENCED(buf, read_domains, write_domain, delta) do { \ - assert((unsigned) (delta) < buf->size); \ intel_batchbuffer_emit_reloc_fenced(intel->batch, buf, \ read_domains, write_domain, delta); \ } while (0) -- cgit v1.2.3 From 7392002041f6c7ac6eb788d7b154f2b44eb6f403 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Mar 2010 12:47:21 -0800 Subject: intel: Replace batch macro contents with function calls. This manages to cut down another 3800 bytes. --- src/mesa/drivers/dri/intel/intel_batchbuffer.h | 67 +++++++++++++++----------- 1 file changed, 39 insertions(+), 28 deletions(-) (limited to 'src/mesa/drivers/dri/intel/intel_batchbuffer.h') diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h index b7625b5b0d..5e0ea916b4 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -71,6 +71,17 @@ GLboolean intel_batchbuffer_emit_reloc_fenced(struct intel_batchbuffer *batch, uint32_t offset); void intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch); +static INLINE uint32_t float_as_int(float f) +{ + union { + float f; + uint32_t d; + } fi; + + fi.f = f; + return fi.d; +} + /* Inline functions - might actually be better off with these * non-inlined. Certainly better off switching all command packets to * be passed as structs rather than dwords, but that's a little bit of @@ -91,6 +102,12 @@ intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, GLuint dword) batch->ptr += 4; } +static INLINE void +intel_batchbuffer_emit_float(struct intel_batchbuffer *batch, float f) +{ + intel_batchbuffer_emit_dword(batch, float_as_int(f)); +} + static INLINE void intel_batchbuffer_require_space(struct intel_batchbuffer *batch, GLuint sz) @@ -100,33 +117,36 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch, intel_batchbuffer_flush(batch); } -static INLINE uint32_t float_as_int(float f) +static INLINE void +intel_batchbuffer_begin(struct intel_batchbuffer *batch, int n) { - union { - float f; - uint32_t d; - } fi; + intel_batchbuffer_require_space(batch, n * 4); + assert(batch->map); + assert(batch->emit.start_ptr == NULL); + batch->emit.total = n * 4; + batch->emit.start_ptr = batch->ptr; +} - fi.f = f; - return fi.d; +static INLINE void +intel_batchbuffer_advance(struct intel_batchbuffer *batch) +{ + unsigned int _n = batch->ptr - batch->emit.start_ptr; + assert(batch->emit.start_ptr != NULL); + if (_n != batch->emit.total) { + fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n", + _n, batch->emit.total); + abort(); + } + batch->emit.start_ptr = NULL; } /* Here are the crusty old macros, to be removed: */ #define BATCH_LOCALS -#define BEGIN_BATCH(n) do { \ - intel_batchbuffer_require_space(intel->batch, (n)*4); \ - assert(intel->batch->map); \ - assert(intel->batch->emit.start_ptr == NULL); \ - intel->batch->emit.total = (n) * 4; \ - intel->batch->emit.start_ptr = intel->batch->ptr; \ -} while (0) - +#define BEGIN_BATCH(n) intel_batchbuffer_begin(intel->batch, n) #define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d) -#define OUT_BATCH_F(f) intel_batchbuffer_emit_dword(intel->batch, \ - float_as_int(f)) - +#define OUT_BATCH_F(f) intel_batchbuffer_emit_float(intel->batch,f) #define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ intel_batchbuffer_emit_reloc(intel->batch, buf, \ read_domains, write_domain, delta); \ @@ -136,15 +156,6 @@ static INLINE uint32_t float_as_int(float f) read_domains, write_domain, delta); \ } while (0) -#define ADVANCE_BATCH() do { \ - unsigned int _n = intel->batch->ptr - intel->batch->emit.start_ptr; \ - assert(intel->batch->emit.start_ptr != NULL); \ - if (_n != intel->batch->emit.total) { \ - fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n", \ - _n, intel->batch->emit.total); \ - abort(); \ - } \ - intel->batch->emit.start_ptr = NULL; \ -} while(0) +#define ADVANCE_BATCH() intel_batchbuffer_advance(intel->batch); #endif -- cgit v1.2.3 From 855515859ec1d94737ea91167220ba7b568c144d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Mar 2010 12:56:40 -0800 Subject: intel: Only do batchbuffer debug if --enable-debug is used. This saves 6.6KB on the 965 driver, and appears to speed firefox-talos-gfx up by 1-2%. Unlike many other asserts in the driver, when we make a mistake that would trigger one of these it generally shows up all the time for developers, so turning it off for release seems fine. --- src/mesa/drivers/dri/intel/intel_batchbuffer.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/mesa/drivers/dri/intel/intel_batchbuffer.h') diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h index 5e0ea916b4..e5ad2617ab 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -24,11 +24,13 @@ struct intel_batchbuffer GLuint size; +#ifdef DEBUG /** Tracking of BEGIN_BATCH()/OUT_BATCH()/ADVANCE_BATCH() debugging */ struct { GLuint total; GLubyte *start_ptr; } emit; +#endif GLuint dirty_state; GLuint reserved_space; @@ -97,7 +99,9 @@ intel_batchbuffer_space(struct intel_batchbuffer *batch) static INLINE void intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, GLuint dword) { +#ifdef DEBUG assert(intel_batchbuffer_space(batch) >= 4); +#endif *(GLuint *) (batch->ptr) = dword; batch->ptr += 4; } @@ -112,7 +116,9 @@ static INLINE void intel_batchbuffer_require_space(struct intel_batchbuffer *batch, GLuint sz) { +#ifdef DEBUG assert(sz < batch->size - 8); +#endif if (intel_batchbuffer_space(batch) < sz) intel_batchbuffer_flush(batch); } @@ -121,15 +127,18 @@ static INLINE void intel_batchbuffer_begin(struct intel_batchbuffer *batch, int n) { intel_batchbuffer_require_space(batch, n * 4); +#ifdef DEBUG assert(batch->map); assert(batch->emit.start_ptr == NULL); batch->emit.total = n * 4; batch->emit.start_ptr = batch->ptr; +#endif } static INLINE void intel_batchbuffer_advance(struct intel_batchbuffer *batch) { +#ifdef DEBUG unsigned int _n = batch->ptr - batch->emit.start_ptr; assert(batch->emit.start_ptr != NULL); if (_n != batch->emit.total) { @@ -138,6 +147,7 @@ intel_batchbuffer_advance(struct intel_batchbuffer *batch) abort(); } batch->emit.start_ptr = NULL; +#endif } /* Here are the crusty old macros, to be removed: -- cgit v1.2.3