diff options
| -rw-r--r-- | src/mesa/drivers/dri/i915/i830_vtbl.c | 4 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/i915/i915_vtbl.c | 3 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/i915/intel_tris.c | 56 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/i915/intel_tris.h | 3 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/intel/intel_batchbuffer.c | 3 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/intel/intel_context.c | 1 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/intel/intel_context.h | 9 | 
7 files changed, 48 insertions, 31 deletions
diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index 4d3ad0083a..0af5ed0b50 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -677,9 +677,6 @@ i830_new_batch(struct intel_context *intel)     struct i830_context *i830 = i830_context(&intel->ctx);     i830->state.emitted = 0; -   /* Signal that we should put new vertices into a new vertex buffer. */ -   intel->prim.needs_new_vb = GL_TRUE; -     /* Check that we didn't just wrap our batchbuffer at a bad time. */     assert(!intel->no_batch_wrap);  } @@ -722,4 +719,5 @@ i830InitVtbl(struct i830_context *i830)     i830->intel.vtbl.render_prevalidate = i830_render_prevalidate;     i830->intel.vtbl.assert_not_dirty = i830_assert_not_dirty;     i830->intel.vtbl.note_unlock = i830_note_unlock;  +   i830->intel.vtbl.finish_batch = intel_finish_vb;  } diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 23d63fb47a..27dfc2b890 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -589,8 +589,6 @@ i915_new_batch(struct intel_context *intel)      * difficulties associated with them (physical address requirements).      */     i915->state.emitted = 0; -   /* Signal that we should put new vertices into a new vertex buffer. */ -   intel->prim.needs_new_vb = GL_TRUE;     /* Check that we didn't just wrap our batchbuffer at a bad time. */     assert(!intel->no_batch_wrap); @@ -633,4 +631,5 @@ i915InitVtbl(struct i915_context *i915)     i915->intel.vtbl.flush_cmd = i915_flush_cmd;     i915->intel.vtbl.assert_not_dirty = i915_assert_not_dirty;     i915->intel.vtbl.note_unlock = i915_note_unlock;  +   i915->intel.vtbl.finish_batch = intel_finish_vb;  } diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c index a1121925cb..8714dd15f3 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.c +++ b/src/mesa/drivers/dri/i915/intel_tris.c @@ -77,31 +77,28 @@ uint32_t *intel_get_prim_space(struct intel_context *intel, unsigned int count)     /* Check for space in the existing VB */     if (intel->prim.vb_bo == NULL || -       intel->prim.needs_new_vb ||         (intel->prim.current_offset +  	count * intel->vertex_size * 4) > INTEL_VB_SIZE ||         (intel->prim.count + count) >= (1 << 16)) {        /* Flush existing prim if any */        INTEL_FIREVERTICES(intel); +      intel_finish_vb(intel); +        /* Start a new VB */ -      dri_bo_unreference(intel->prim.vb_bo); +      if (intel->prim.vb == NULL) +	 intel->prim.vb = malloc(INTEL_VB_SIZE);        intel->prim.vb_bo = dri_bo_alloc(intel->bufmgr, "vb",  				       INTEL_VB_SIZE, 4);        intel->prim.start_offset = 0;        intel->prim.current_offset = 0;        dri_bufmgr_check_aperture_space(intel->prim.vb_bo); - -      intel->prim.needs_new_vb = GL_FALSE; - -      dri_bo_map(intel->prim.vb_bo, GL_TRUE);     }     intel->prim.flush = intel_flush_prim; -   addr = (uint32_t *)((char *)intel->prim.vb_bo->virtual + -		       intel->prim.current_offset); +   addr = (uint32_t *)(intel->prim.vb + intel->prim.current_offset);     intel->prim.current_offset += intel->vertex_size * 4 * count;     intel->prim.count += count; @@ -112,6 +109,7 @@ uint32_t *intel_get_prim_space(struct intel_context *intel, unsigned int count)  void intel_flush_prim(struct intel_context *intel)  {     BATCH_LOCALS; +   dri_bo *vb_bo;     /* Must be called after an intel_start_prim. */     assert(intel->prim.primitive != ~0); @@ -119,9 +117,13 @@ void intel_flush_prim(struct intel_context *intel)     if (intel->prim.count == 0)        return; -   intel_wait_flips(intel); +   /* Keep a reference on the BO as it may get finished as we start the +    * batch emit. +    */ +   vb_bo = intel->prim.vb_bo; +   dri_bo_reference(vb_bo); -   dri_bo_unmap(intel->prim.vb_bo); +   intel_wait_flips(intel);     intel->vtbl.emit_state(intel); @@ -147,7 +149,7 @@ void intel_flush_prim(struct intel_context *intel)        OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |  		I1_LOAD_S(0) | I1_LOAD_S(1) | 1);        assert((intel->prim.start_offset & !S0_VB_OFFSET_MASK) == 0); -      OUT_RELOC(intel->prim.vb_bo, I915_GEM_DOMAIN_VERTEX, 0, +      OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0,  		intel->prim.start_offset);        OUT_BATCH((intel->vertex_size << S1_VERTEX_WIDTH_SHIFT) |  		(intel->vertex_size << S1_VERTEX_PITCH_SHIFT)); @@ -167,7 +169,7 @@ void intel_flush_prim(struct intel_context *intel)  		I1_LOAD_S(0) | I1_LOAD_S(2) | 1);        /* S0 */        assert((intel->prim.start_offset & !S0_VB_OFFSET_MASK_830) == 0); -      OUT_RELOC(intel->prim.vb_bo, I915_GEM_DOMAIN_VERTEX, 0, +      OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0,  		intel->prim.start_offset |  		(intel->vertex_size << S0_VB_PITCH_SHIFT_830) |  		S0_VB_ENABLE_830); @@ -193,17 +195,35 @@ void intel_flush_prim(struct intel_context *intel)     intel->no_batch_wrap = GL_FALSE; -   /* If we're going to keep using this VB for more primitives, map it -    * again. -    */ -   if (!intel->prim.needs_new_vb) -      dri_bo_map(intel->prim.vb_bo, GL_TRUE); -     intel->prim.flush = NULL;     intel->prim.start_offset = intel->prim.current_offset;     if (!IS_9XX(intel->intelScreen->deviceID))        intel->prim.start_offset = ALIGN(intel->prim.start_offset, 128);     intel->prim.count = 0; + +   dri_bo_unreference(vb_bo); +} + +/** + * Uploads the locally-accumulated VB into the buffer object. + * + * This avoids us thrashing the cachelines in and out as the buffer gets + * filled, dispatched, then reused as the hardware completes rendering from it, + * and also lets us clflush less if we dispatch with a partially-filled VB. + * + * This is called normally from get_space when we're finishing a BO, but also + * at batch flush time so that we don't try accessing the contents of a + * just-dispatched buffer. + */ +void intel_finish_vb(struct intel_context *intel) +{ +   if (intel->prim.vb_bo == NULL) +      return; + +   dri_bo_subdata(intel->prim.vb_bo, 0, intel->prim.start_offset, +		  intel->prim.vb); +   dri_bo_unreference(intel->prim.vb_bo); +   intel->prim.vb_bo = NULL;  }  /*********************************************************************** diff --git a/src/mesa/drivers/dri/i915/intel_tris.h b/src/mesa/drivers/dri/i915/intel_tris.h index 6b38cd6fbd..0e08986221 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.h +++ b/src/mesa/drivers/dri/i915/intel_tris.h @@ -30,7 +30,7 @@  #include "mtypes.h" -#define INTEL_VB_SIZE		(8 * 1024) +#define INTEL_VB_SIZE		(32 * 1024)  /** 3 dwords of state_immediate and 2 of 3dprim, in intel_flush_prim */  #define INTEL_PRIM_EMIT_SIZE	(5 * 4) @@ -49,5 +49,6 @@ extern void intelChooseRenderState(GLcontext * ctx);  void intel_set_prim(struct intel_context *intel, uint32_t prim);  GLuint *intel_get_prim_space(struct intel_context *intel, unsigned int count);  void intel_flush_prim(struct intel_context *intel); +void intel_finish_vb(struct intel_context *intel);  #endif diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 019880581a..9ad9f6a6c0 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -250,6 +250,9 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,      * avoid that in the first place. */     batch->ptr = batch->map; +   if (intel->vtbl.finish_batch) +      intel->vtbl.finish_batch(intel); +     /* TODO: Just pass the relocation list and dma buffer up to the      * kernel.      */ diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index b9e1eae982..16ddbeea9e 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -727,6 +727,7 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv)        intel->Fallback = 0;      /* don't call _swrast_Flush later */        intel_batchbuffer_free(intel->batch); +      free(intel->prim.vb);        if (release_texture_heaps) {           /* This share group is about to go away, free our private diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 1aa9c3d711..c314b6e218 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -86,6 +86,7 @@ struct intel_context     {        void (*destroy) (struct intel_context * intel);        void (*emit_state) (struct intel_context * intel); +      void (*finish_batch) (struct intel_context * intel);        void (*new_batch) (struct intel_context * intel);        void (*emit_invarient_state) (struct intel_context * intel);        void (*note_fence) (struct intel_context *intel, GLuint fence); @@ -185,16 +186,10 @@ struct intel_context        uint32_t primitive;	/**< Current hardware primitive type */        void (*flush) (struct intel_context *);        dri_bo *vb_bo; +      uint8_t *vb;        unsigned int start_offset; /**< Byte offset of primitive sequence */        unsigned int current_offset; /**< Byte offset of next vertex */        unsigned int count;	/**< Number of vertices in current primitive */ -      /** -       * Signals when a new VB should be started, regardless of remaining -       * space. -       * -       * Used to avoid rewriting a VB that's being rendered from. -       */ -      GLboolean needs_new_vb;     } prim;     GLuint stats_wm;  | 
