From 8d68a90e225d831a395ba788e425cb717eec1f9a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 10 Feb 2011 20:25:51 +0000 Subject: intel: use pwrite for batch It's faster. Not only is the memcpy more efficiently performed in the kernel (making up for the system call overhead), but by not using mmap we remove the greater overhead of tracking the vma of every batch. And it means we can read back from the batch buffer without incurring the cost of a uncached read through the GTT. Signed-off-by: Chris Wilson --- src/mesa/drivers/dri/i915/i830_vtbl.c | 10 ++++------ src/mesa/drivers/dri/i915/i915_vtbl.c | 10 ++++------ src/mesa/drivers/dri/i915/intel_render.c | 2 +- src/mesa/drivers/dri/i915/intel_tris.c | 32 ++++++++++++-------------------- 4 files changed, 21 insertions(+), 33 deletions(-) (limited to 'src/mesa/drivers/dri/i915') diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index ebdefeac87..19f0807759 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -364,7 +364,7 @@ i830_emit_invarient_state(struct intel_context *intel) #define emit( intel, state, size ) \ - intel_batchbuffer_data(intel->batch, state, size, false) + intel_batchbuffer_data(intel, state, size, false) static GLuint get_dirty(struct i830_hw_state *state) @@ -428,7 +428,7 @@ i830_emit_state(struct intel_context *intel) * scheduling is allowed, rather than assume that it is whenever a * batchbuffer fills up. */ - intel_batchbuffer_require_space(intel->batch, + intel_batchbuffer_require_space(intel, get_state_size(state) + INTEL_PRIM_EMIT_SIZE, false); count = 0; @@ -436,7 +436,7 @@ i830_emit_state(struct intel_context *intel) aper_count = 0; dirty = get_dirty(state); - aper_array[aper_count++] = intel->batch->buf; + aper_array[aper_count++] = intel->batch.bo; if (dirty & I830_UPLOAD_BUFFERS) { aper_array[aper_count++] = state->draw_region->buffer; if (state->depth_region) @@ -453,7 +453,7 @@ i830_emit_state(struct intel_context *intel) if (dri_bufmgr_check_aperture_space(aper_array, aper_count)) { if (count == 0) { count++; - intel_batchbuffer_flush(intel->batch); + intel_batchbuffer_flush(intel); goto again; } else { _mesa_error(ctx, GL_OUT_OF_MEMORY, "i830 emit state"); @@ -556,9 +556,7 @@ i830_emit_state(struct intel_context *intel) } } - intel->batch->dirty_state &= ~dirty; assert(get_dirty(state) == 0); - assert((intel->batch->dirty_state & (1<<1)) == 0); } static void diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index a94b957127..394935c96c 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -217,7 +217,7 @@ i915_emit_invarient_state(struct intel_context *intel) #define emit(intel, state, size ) \ - intel_batchbuffer_data(intel->batch, state, size, false) + intel_batchbuffer_data(intel, state, size, false) static GLuint get_dirty(struct i915_hw_state *state) @@ -299,7 +299,7 @@ i915_emit_state(struct intel_context *intel) * scheduling is allowed, rather than assume that it is whenever a * batchbuffer fills up. */ - intel_batchbuffer_require_space(intel->batch, + intel_batchbuffer_require_space(intel, get_state_size(state) + INTEL_PRIM_EMIT_SIZE, false); count = 0; @@ -307,7 +307,7 @@ i915_emit_state(struct intel_context *intel) aper_count = 0; dirty = get_dirty(state); - aper_array[aper_count++] = intel->batch->buf; + aper_array[aper_count++] = intel->batch.bo; if (dirty & I915_UPLOAD_BUFFERS) { aper_array[aper_count++] = state->draw_region->buffer; if (state->depth_region) @@ -327,7 +327,7 @@ i915_emit_state(struct intel_context *intel) if (dri_bufmgr_check_aperture_space(aper_array, aper_count)) { if (count == 0) { count++; - intel_batchbuffer_flush(intel->batch); + intel_batchbuffer_flush(intel); goto again; } else { _mesa_error(ctx, GL_OUT_OF_MEMORY, "i915 emit state"); @@ -476,9 +476,7 @@ i915_emit_state(struct intel_context *intel) } } - intel->batch->dirty_state &= ~dirty; assert(get_dirty(state) == 0); - assert((intel->batch->dirty_state & (1<<1)) == 0); } static void diff --git a/src/mesa/drivers/dri/i915/intel_render.c b/src/mesa/drivers/dri/i915/intel_render.c index 0d8ab4b507..2d361ca0a9 100644 --- a/src/mesa/drivers/dri/i915/intel_render.c +++ b/src/mesa/drivers/dri/i915/intel_render.c @@ -124,7 +124,7 @@ static INLINE GLuint intel_get_vb_max(struct intel_context *intel) GLuint ret; if (intel->intelScreen->no_vbo) - ret = intel->batch->size - 1500; + ret = sizeof(intel->batch.map) - 1500; else ret = INTEL_VB_SIZE; ret /= (intel->vertex_size * 4); diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c index b9a8aeb12f..c6b5a01885 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.c +++ b/src/mesa/drivers/dri/i915/intel_tris.c @@ -62,22 +62,22 @@ static void intelRasterPrimitive(struct gl_context * ctx, GLenum rprim, static void intel_flush_inline_primitive(struct intel_context *intel) { - GLuint used = intel->batch->ptr - intel->prim.start_ptr; + GLuint used = intel->batch.used - intel->prim.start_ptr; assert(intel->prim.primitive != ~0); /* printf("/\n"); */ - if (used < 8) + if (used < 2) goto do_discard; - *(int *) intel->prim.start_ptr = (_3DPRIMITIVE | - intel->prim.primitive | (used / 4 - 2)); + intel->batch.map[intel->prim.start_ptr] = + _3DPRIMITIVE | intel->prim.primitive | (used - 2); goto finished; do_discard: - intel->batch->ptr -= used; + intel->batch.used = intel->prim.start_ptr; finished: intel->prim.primitive = ~0; @@ -100,9 +100,7 @@ static void intel_start_inline(struct intel_context *intel, uint32_t prim) */ BEGIN_BATCH(1); - assert((intel->batch->dirty_state & (1<<1)) == 0); - - intel->prim.start_ptr = intel->batch->ptr; + intel->prim.start_ptr = intel->batch.used; intel->prim.primitive = prim; intel->prim.flush = intel_flush_inline_primitive; @@ -118,26 +116,25 @@ static void intel_wrap_inline(struct intel_context *intel) GLuint prim = intel->prim.primitive; intel_flush_inline_primitive(intel); - intel_batchbuffer_flush(intel->batch); + intel_batchbuffer_flush(intel); intel_start_inline(intel, prim); /* ??? */ } static GLuint *intel_extend_inline(struct intel_context *intel, GLuint dwords) { - GLuint sz = dwords * sizeof(GLuint); GLuint *ptr; assert(intel->prim.flush == intel_flush_inline_primitive); - if (intel_batchbuffer_space(intel->batch) < sz) + if (intel_batchbuffer_space(intel) < dwords * sizeof(GLuint)) intel_wrap_inline(intel); /* printf("."); */ intel->vtbl.assert_not_dirty(intel); - ptr = (GLuint *) intel->batch->ptr; - intel->batch->ptr += sz; + ptr = intel->batch.map + intel->batch.used; + intel->batch.used += dwords; return ptr; } @@ -223,10 +220,10 @@ void intel_flush_prim(struct intel_context *intel) intel->vtbl.emit_state(intel); - aper_array[0] = intel->batch->buf; + aper_array[0] = intel->batch.bo; aper_array[1] = vb_bo; if (dri_bufmgr_check_aperture_space(aper_array, 2)) { - intel_batchbuffer_flush(intel->batch); + intel_batchbuffer_flush(intel); intel->vtbl.emit_state(intel); } @@ -236,11 +233,6 @@ void intel_flush_prim(struct intel_context *intel) */ intel->no_batch_wrap = GL_TRUE; - /* Check that we actually emitted the state into this batch, using the - * UPLOAD_CTX bit as the signal. - */ - assert((intel->batch->dirty_state & (1<<1)) == 0); - #if 0 printf("emitting %d..%d=%d vertices size %d\n", offset, intel->prim.current_offset, count, -- cgit v1.2.3