From e476e122207e6195a16a8c7d2cab90eeba227934 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 8 Feb 2011 20:01:10 +0000 Subject: intel: Pack dynamic draws together Dynamic arrays have the tendency to be small and so allocating a bo for each one is overkill and we can exploit many efficiency gains by packing them together. Signed-off-by: Chris Wilson --- src/mesa/drivers/dri/i965/brw_draw_upload.c | 16 +++--- src/mesa/drivers/dri/intel/intel_batchbuffer.c | 6 +++ src/mesa/drivers/dri/intel/intel_buffer_objects.c | 62 ++++++++++++++++++++--- src/mesa/drivers/dri/intel/intel_buffer_objects.h | 6 +++ src/mesa/drivers/dri/intel/intel_context.h | 5 ++ src/mesa/drivers/dri/intel/intel_pixel_read.c | 7 +-- src/mesa/drivers/dri/intel/intel_tex_image.c | 6 ++- 7 files changed, 88 insertions(+), 20 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 2cd249812f..7f93f1d5ed 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -359,13 +359,13 @@ static void brw_prepare_vertices(struct brw_context *brw) if (_mesa_is_bufferobj(input->glarray->BufferObj)) { struct intel_buffer_object *intel_buffer = intel_buffer_object(input->glarray->BufferObj); + GLuint offset; /* Named buffer object: Just reference its contents directly. */ drm_intel_bo_unreference(input->bo); - input->bo = intel_bufferobj_buffer(intel, intel_buffer, - INTEL_READ); + input->bo = intel_bufferobj_source(intel, intel_buffer, &offset); drm_intel_bo_reference(input->bo); - input->offset = (unsigned long)input->glarray->Ptr; + input->offset = offset + (unsigned long)input->glarray->Ptr; input->stride = input->glarray->StrideB; input->count = input->glarray->_MaxElement; @@ -633,16 +633,16 @@ static void brw_prepare_indices(struct brw_context *brw) ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj); } else { - bo = intel_bufferobj_buffer(intel, intel_buffer_object(bufferobj), - INTEL_READ); - drm_intel_bo_reference(bo); - /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading * the index buffer state when we're just moving the start index * of our drawing. */ brw->ib.start_vertex_offset = offset / ib_type_size; - offset = 0; + + bo = intel_bufferobj_source(intel, intel_buffer_object(bufferobj), + &offset); + drm_intel_bo_reference(bo); + ib_size = bo->size; } } diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 67ce8a4da0..28bf42e770 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -175,6 +175,12 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, if (intel->vtbl.finish_batch) intel->vtbl.finish_batch(intel); + if (intel->upload.bo) { + drm_intel_bo_unreference(intel->upload.bo); + intel->upload.bo = NULL; + intel->upload.offset = 0; + } + /* Check that we didn't just wrap our batchbuffer at a bad time. */ assert(!intel->no_batch_wrap); diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index 62e8d822c2..f54fc1a12b 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -528,7 +528,8 @@ intel_bufferobj_unmap(struct gl_context * ctx, drm_intel_bo * intel_bufferobj_buffer(struct intel_context *intel, - struct intel_buffer_object *intel_obj, GLuint flag) + struct intel_buffer_object *intel_obj, + GLuint flag) { if (intel_obj->region) { if (flag == INTEL_WRITE_PART) @@ -539,19 +540,65 @@ intel_bufferobj_buffer(struct intel_context *intel, } } + if (intel_obj->source) { + drm_intel_bo_unreference(intel_obj->buffer); + intel_obj->buffer = NULL; + intel_obj->source = 0; + } + if (intel_obj->buffer == NULL) { - /* XXX suballocate for DYNAMIC READ */ intel_bufferobj_alloc_buffer(intel, intel_obj); drm_intel_bo_subdata(intel_obj->buffer, 0, intel_obj->Base.Size, intel_obj->sys_buffer); - if (flag != INTEL_READ) { - free(intel_obj->sys_buffer); - intel_obj->sys_buffer = NULL; + free(intel_obj->sys_buffer); + intel_obj->sys_buffer = NULL; + intel_obj->offset = 0; + } + + return intel_obj->buffer; +} + +#define INTEL_UPLOAD_SIZE (64*1024) + +static void wrap_buffers(struct intel_context *intel, GLuint size) +{ + if (size < INTEL_UPLOAD_SIZE) + size = INTEL_UPLOAD_SIZE; + + if (intel->upload.bo != NULL) + drm_intel_bo_unreference(intel->upload.bo); + + intel->upload.bo = drm_intel_bo_alloc(intel->bufmgr, "upload", size, 0); + intel->upload.offset = 0; +} + +drm_intel_bo * +intel_bufferobj_source(struct intel_context *intel, + struct intel_buffer_object *intel_obj, + GLuint *offset) +{ + if (intel_obj->buffer == NULL) { + GLuint size = ALIGN(intel_obj->Base.Size, 64); + + if (intel->upload.bo == NULL || + intel->upload.offset + size > intel->upload.bo->size) { + wrap_buffers(intel, size); } + + drm_intel_bo_reference(intel->upload.bo); + intel_obj->buffer = intel->upload.bo; + intel_obj->offset = intel->upload.offset; + intel_obj->source = 1; + intel->upload.offset += size; + + drm_intel_bo_subdata(intel_obj->buffer, + intel_obj->offset, intel_obj->Base.Size, + intel_obj->sys_buffer); } + *offset = intel_obj->offset; return intel_obj->buffer; } @@ -566,6 +613,7 @@ intel_bufferobj_copy_subdata(struct gl_context *ctx, struct intel_buffer_object *intel_src = intel_buffer_object(src); struct intel_buffer_object *intel_dst = intel_buffer_object(dst); drm_intel_bo *src_bo, *dst_bo; + GLuint src_offset; if (size == 0) return; @@ -600,11 +648,11 @@ intel_bufferobj_copy_subdata(struct gl_context *ctx, /* Otherwise, we have real BOs, so blit them. */ dst_bo = intel_bufferobj_buffer(intel, intel_dst, INTEL_WRITE_PART); - src_bo = intel_bufferobj_buffer(intel, intel_src, INTEL_READ); + src_bo = intel_bufferobj_source(intel, intel_src, &src_offset); intel_emit_linear_blit(intel, dst_bo, write_offset, - src_bo, read_offset, size); + src_bo, read_offset + src_offset, size); /* Since we've emitted some blits to buffers that will (likely) be used * in rendering operations in other cache domains in this batch, emit a diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.h b/src/mesa/drivers/dri/intel/intel_buffer_objects.h index b15c192106..c845b70b52 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.h +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.h @@ -42,6 +42,8 @@ struct intel_buffer_object { struct gl_buffer_object Base; drm_intel_bo *buffer; /* the low-level buffer manager's buffer handle */ + GLuint offset; /* any offset into that buffer */ + /** System memory buffer data, if not using a BO to store the data. */ void *sys_buffer; @@ -55,6 +57,7 @@ struct intel_buffer_object GLsizei range_map_size; GLboolean mapped_gtt; + GLboolean source; }; @@ -63,6 +66,9 @@ struct intel_buffer_object drm_intel_bo *intel_bufferobj_buffer(struct intel_context *intel, struct intel_buffer_object *obj, GLuint flag); +drm_intel_bo *intel_bufferobj_source(struct intel_context *intel, + struct intel_buffer_object *obj, + GLuint *offset); /* Hook the bufferobject implementation into mesa: */ diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 134e07ea6a..a1ed462e54 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -187,6 +187,11 @@ struct intel_context unsigned int count; /**< Number of vertices in current primitive */ } prim; + struct { + drm_intel_bo *bo; + GLuint offset; + } upload; + GLuint stats_wm; /* Offsets of fields within the current vertex: diff --git a/src/mesa/drivers/dri/intel/intel_pixel_read.c b/src/mesa/drivers/dri/intel/intel_pixel_read.c index 54da29236d..716b9cea40 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_read.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_read.c @@ -78,6 +78,7 @@ do_blit_readpixels(struct gl_context * ctx, GLuint dst_offset; GLuint rowLength; drm_intel_bo *dst_buffer; + GLuint offset; GLboolean all; GLint dst_x, dst_y; @@ -138,8 +139,8 @@ do_blit_readpixels(struct gl_context * ctx, dst_y = 0; dst_buffer = intel_bufferobj_buffer(intel, dst, - all ? INTEL_WRITE_FULL : - INTEL_WRITE_PART); + all ? INTEL_WRITE_FULL : + INTEL_WRITE_PART); if (ctx->ReadBuffer->Name == 0) y = ctx->ReadBuffer->Height - (y + height); @@ -147,7 +148,7 @@ do_blit_readpixels(struct gl_context * ctx, if (!intelEmitCopyBlit(intel, src->cpp, src->pitch, src->buffer, 0, src->tiling, - rowLength, dst_buffer, dst_offset, GL_FALSE, + rowLength, dst_buffer, dst_offset + offset, GL_FALSE, x, y, dst_x, dst_y, width, height, diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index cd8c4c22e5..ddcb748c82 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -235,11 +235,13 @@ try_pbo_upload(struct intel_context *intel, intel_flush(&intel->ctx); { - drm_intel_bo *src_buffer = intel_bufferobj_buffer(intel, pbo, INTEL_READ); + GLuint offset; + drm_intel_bo *src_buffer = intel_bufferobj_source(intel, pbo, &offset); if (!intelEmitCopyBlit(intel, intelImage->mt->cpp, - src_stride, src_buffer, src_offset, GL_FALSE, + src_stride, src_buffer, + src_offset + offset, GL_FALSE, dst_stride, dst_buffer, 0, intelImage->mt->region->tiling, 0, 0, dst_x, dst_y, width, height, -- cgit v1.2.3