From 9e018d822523e559fa8d92c3b5a83dd5554a0676 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Fri, 14 Aug 2009 16:59:26 +0200 Subject: r300: rework index buffer setup Copy elements directly to DMA bo to get rid of one memcpy, and prepare for using VBOs for index buffer. --- src/mesa/drivers/dri/r300/r300_context.h | 5 +- src/mesa/drivers/dri/r300/r300_draw.c | 155 ++++++++++++++++++------------- src/mesa/drivers/dri/r300/r300_render.c | 85 +++++++---------- 3 files changed, 126 insertions(+), 119 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 09de898748..d620417422 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -498,9 +498,10 @@ struct r300_vertex_buffer { }; struct r300_index_buffer { - GLvoid *ptr; + struct radeon_bo *bo; + int bo_offset; + GLboolean is_32bit; - GLboolean free_needed; GLuint count; }; diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c index 99c73d27a2..1d6e6db773 100644 --- a/src/mesa/drivers/dri/r300/r300_draw.c +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -47,32 +47,53 @@ #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" + +static int getTypeSize(GLenum type) +{ + switch (type) { + case GL_DOUBLE: + return sizeof(GLdouble); + case GL_FLOAT: + return sizeof(GLfloat); + case GL_INT: + return sizeof(GLint); + case GL_UNSIGNED_INT: + return sizeof(GLuint); + case GL_SHORT: + return sizeof(GLshort); + case GL_UNSIGNED_SHORT: + return sizeof(GLushort); + case GL_BYTE: + return sizeof(GLbyte); + case GL_UNSIGNED_BYTE: + return sizeof(GLubyte); + default: + assert(0); + return 0; + } +} + static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) { r300ContextPtr r300 = R300_CONTEXT(ctx); - struct r300_index_buffer *ind_buf = &r300->ind_buf; GLvoid *src_ptr; - GLboolean mapped_bo = GL_FALSE; + GLuint *out; + int i; - if (!mesa_ind_buf) { - ind_buf->ptr = NULL; - return; - } - - ind_buf->count = mesa_ind_buf->count; if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); - mapped_bo = GL_TRUE; assert(mesa_ind_buf->obj->Pointer != NULL); } src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); if (mesa_ind_buf->type == GL_UNSIGNED_BYTE) { + GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); GLubyte *in = (GLubyte *)src_ptr; - GLuint *out = _mesa_malloc(sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1)); - int i; - ind_buf->ptr = out; + radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offset, size, 4); + + assert(r300->ind_buf.bo->ptr != NULL); + out = (GLuint *)ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset); for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) { *out++ = in[i] | in[i + 1] << 16; @@ -82,16 +103,15 @@ static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *out++ = in[i]; } - ind_buf->free_needed = GL_TRUE; - ind_buf->is_32bit = GL_FALSE; - } else if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) { #if MESA_BIG_ENDIAN + } else { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */ GLushort *in = (GLushort *)src_ptr; - GLuint *out = _mesa_malloc(sizeof(GLushort) * - ((mesa_ind_buf->count + 1) & ~1)); - int i; + size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); + + radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offet, size, 4); - ind_buf->ptr = out; + assert(r300->ind_buf.bo->ptr != NULL) + out = (GLuint *)ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset); for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) { *out++ = in[i] | in[i + 1] << 16; @@ -100,46 +120,52 @@ static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer if (i < mesa_ind_buf->count) { *out++ = in[i]; } - - ind_buf->free_needed = GL_TRUE; -#else - ind_buf->ptr = src_ptr; - ind_buf->free_needed = GL_FALSE; #endif - ind_buf->is_32bit = GL_FALSE; - } else { - ind_buf->ptr = src_ptr; - ind_buf->free_needed = GL_FALSE; - ind_buf->is_32bit = GL_TRUE; } - if (mapped_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); - } + r300->ind_buf.is_32bit = GL_FALSE; + r300->ind_buf.count = mesa_ind_buf->count; } -static int getTypeSize(GLenum type) + +static void r300SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) { - switch (type) { - case GL_DOUBLE: - return sizeof(GLdouble); - case GL_FLOAT: - return sizeof(GLfloat); - case GL_INT: - return sizeof(GLint); - case GL_UNSIGNED_INT: - return sizeof(GLuint); - case GL_SHORT: - return sizeof(GLshort); - case GL_UNSIGNED_SHORT: - return sizeof(GLushort); - case GL_BYTE: - return sizeof(GLbyte); - case GL_UNSIGNED_BYTE: - return sizeof(GLubyte); - default: - assert(0); - return 0; + r300ContextPtr r300 = R300_CONTEXT(ctx); + GLboolean mapped_named_bo = GL_FALSE; + + if (!mesa_ind_buf) { + r300->ind_buf.bo = NULL; + return; + } + +#if MESA_BIG_ENDIAN + if (mesa_ind_buf->type == GL_UNSIGNED_INT) { +#else + if (mesa_ind_buf->type != GL_UNSIGNED_BYTE) { +#endif + const GLvoid *src_ptr; + GLvoid *dst_ptr; + + if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { + ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + assert(mesa_ind_buf->obj->Pointer != NULL); + mapped_named_bo = GL_TRUE; + } + + src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); + + const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type); + + radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offset, size, 4); + + assert(r300->ind_buf.bo->ptr != NULL); + dst_ptr = ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset); + _mesa_memcpy(dst_ptr, src_ptr, size); + + r300->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT); + r300->ind_buf.count = mesa_ind_buf->count; + } else { + r300FixupIndexBuffer(ctx, mesa_ind_buf); } } @@ -473,13 +499,22 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar RADEON_GEM_DOMAIN_GTT, 0); } } - r300->radeon.tcl.aos_count = vbuf->num_attribs; + + if (r300->ind_buf.bo) { + radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, + r300->ind_buf.bo, + RADEON_GEM_DOMAIN_GTT, 0); + } } } static void r300FreeData(GLcontext *ctx) { + /* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo + * to prevent double unref in radeonReleaseArrays + * called during context destroy + */ r300ContextPtr r300 = R300_CONTEXT(ctx); { int i; @@ -493,15 +528,9 @@ static void r300FreeData(GLcontext *ctx) } { - struct r300_index_buffer *ind_buf = &R300_CONTEXT(ctx)->ind_buf; - if (ind_buf->free_needed) { - _mesa_free(ind_buf->ptr); - } - - if (r300->radeon.tcl.elt_dma_bo) { - radeon_bo_unref(r300->radeon.tcl.elt_dma_bo); + if (r300->ind_buf.bo != NULL) { + radeon_bo_unref(r300->ind_buf.bo); } - r300->radeon.tcl.elt_dma_bo = NULL; } } @@ -526,7 +555,7 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx, r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, !r300ValidateBuffers(ctx)); - r300FixupIndexBuffer(ctx, ib); + r300SetupIndexBuffer(ctx, ib); /* ensure we have the cmd buf space in advance to cover * the state + DMA AOS pointers */ diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index 22b0d316cf..196cb47fef 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -172,64 +172,42 @@ int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) return num_verts - verts_off; } -static void r300EmitElts(GLcontext * ctx, unsigned long n_elts) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - void *out; - GLuint size; - - size = ((rmesa->ind_buf.is_32bit ? 4 : 2) * n_elts + 3) & ~3; - - radeonAllocDmaRegion(&rmesa->radeon, &rmesa->radeon.tcl.elt_dma_bo, - &rmesa->radeon.tcl.elt_dma_offset, size, 4); - radeon_bo_map(rmesa->radeon.tcl.elt_dma_bo, 1); - out = rmesa->radeon.tcl.elt_dma_bo->ptr + rmesa->radeon.tcl.elt_dma_offset; - memcpy(out, rmesa->ind_buf.ptr, size); - radeon_bo_unmap(rmesa->radeon.tcl.elt_dma_bo); -} - static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type) { BATCH_LOCALS(&rmesa->radeon); + int size; - r300_emit_scissor(rmesa->radeon.glCtx); - if (vertex_count > 0) { - int size; - - BEGIN_BATCH(10); - OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0); - if (rmesa->ind_buf.is_32bit) { - size = vertex_count; - OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | - ((vertex_count + 0) << 16) | type | - R300_VAP_VF_CNTL__INDEX_SIZE_32bit); - } else { - size = (vertex_count + 1) >> 1; - OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | - ((vertex_count + 0) << 16) | type); - } + r300_emit_scissor(rmesa->radeon.glCtx); - if (!rmesa->radeon.radeonScreen->kernel_mm) { - OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); - OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | - (R300_VAP_PORT_IDX0 >> 2)); - OUT_BATCH_RELOC(rmesa->radeon.tcl.elt_dma_offset, - rmesa->radeon.tcl.elt_dma_bo, - rmesa->radeon.tcl.elt_dma_offset, - RADEON_GEM_DOMAIN_GTT, 0, 0); - OUT_BATCH(size); - } else { - OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); - OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | - (R300_VAP_PORT_IDX0 >> 2)); - OUT_BATCH(rmesa->radeon.tcl.elt_dma_offset); - OUT_BATCH(size); - radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, - rmesa->radeon.tcl.elt_dma_bo, - RADEON_GEM_DOMAIN_GTT, 0, 0); - } - END_BATCH(); + BEGIN_BATCH(10); + OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0); + if (rmesa->ind_buf.is_32bit) { + size = vertex_count; + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | + (vertex_count << 16) | type | + R300_VAP_VF_CNTL__INDEX_SIZE_32bit); + } else { + size = (vertex_count + 1) >> 1; + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | + (vertex_count << 16) | type); + } + + if (!rmesa->radeon.radeonScreen->kernel_mm) { + OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); + OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | + (R300_VAP_PORT_IDX0 >> 2)); + OUT_BATCH_RELOC(0, rmesa->ind_buf.bo, rmesa->ind_buf.bo_offset, RADEON_GEM_DOMAIN_GTT, 0, 0); + OUT_BATCH(size); + } else { + OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); + OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | + (R300_VAP_PORT_IDX0 >> 2)); + OUT_BATCH(rmesa->ind_buf.bo_offset); + OUT_BATCH(size); + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->ind_buf.bo, RADEON_GEM_DOMAIN_GTT, 0, 0); } + END_BATCH(); } static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) @@ -365,8 +343,7 @@ void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) */ rcommonEnsureCmdBufSpace(&rmesa->radeon, 128, __FUNCTION__); - if (rmesa->ind_buf.ptr) { - r300EmitElts(ctx, num_verts); + if (rmesa->ind_buf.bo) { r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, 0); if (rmesa->radeon.radeonScreen->kernel_mm) { BEGIN_BATCH_NO_AUTOSTATE(2); -- cgit v1.2.3