From 5ba92a5b0543b4ff2c7db6101029ba36cb9843fa Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 29 Jan 2009 16:47:37 +1000 Subject: radeon/r200/r300: bring back old style DMA buffer on top of BOs. this gets back a lot of the lots speed in gears on r500 at least I also fixed the legacy bufmgr to deal when the dma space fills up --- src/mesa/drivers/dri/r300/r300_context.c | 1 + src/mesa/drivers/dri/r300/r300_context.h | 1 - src/mesa/drivers/dri/r300/r300_emit.c | 52 ++++++++++--------- src/mesa/drivers/dri/r300/r300_ioctl.c | 10 ++-- src/mesa/drivers/dri/r300/r300_render.c | 6 +-- src/mesa/drivers/dri/r300/r300_swtcl.c | 89 ++++++++++++++++++++++---------- 6 files changed, 97 insertions(+), 62 deletions(-) (limited to 'src/mesa/drivers/dri/r300') diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index eb8e481a18..f57952d878 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -259,6 +259,7 @@ static void r300_init_vtbl(radeonContextPtr radeon) radeon->vtbl.update_draw_buffer = r300UpdateDrawBuffer; radeon->vtbl.emit_cs_header = r300_vtbl_emit_cs_header; radeon->vtbl.emit_state = r300_vtbl_emit_state; + radeon->vtbl.flush_vertices = r300_vtbl_flush_vertices; } diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 8f67460e42..155529a8a6 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -699,7 +699,6 @@ struct r300_swtcl_info { GLuint specoffset; struct radeon_bo *bo; - void (*flush) (r300ContextPtr); }; diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c index e2d90a843e..c47f19ea11 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.c +++ b/src/mesa/drivers/dri/r300/r300_emit.c @@ -301,28 +301,28 @@ int r300EmitArrays(GLcontext * ctx) } /* Setup INPUT_ROUTE. */ - if (rmesa->radeon.radeonScreen->kernel_mm) { - R300_STATECHANGE(rmesa, vir[0]); - rmesa->hw.vir[0].cmd[0] &= 0xC000FFFF; - rmesa->hw.vir[1].cmd[0] &= 0xC000FFFF; - rmesa->hw.vir[0].cmd[0] |= - (r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0], - vb->AttribPtr, inputs, tab, nr) & 0x3FFF) << 16; - R300_STATECHANGE(rmesa, vir[1]); - rmesa->hw.vir[1].cmd[0] |= - (r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, - nr) & 0x3FFF) << 16; - } else { - R300_STATECHANGE(rmesa, vir[0]); - ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count = - r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0], - vb->AttribPtr, inputs, tab, nr); - R300_STATECHANGE(rmesa, vir[1]); - ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = - r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, - nr); - } - + if (rmesa->radeon.radeonScreen->kernel_mm) { + R300_STATECHANGE(rmesa, vir[0]); + rmesa->hw.vir[0].cmd[0] &= 0xC000FFFF; + rmesa->hw.vir[1].cmd[0] &= 0xC000FFFF; + rmesa->hw.vir[0].cmd[0] |= + (r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0], + vb->AttribPtr, inputs, tab, nr) & 0x3FFF) << 16; + R300_STATECHANGE(rmesa, vir[1]); + rmesa->hw.vir[1].cmd[0] |= + (r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, + nr) & 0x3FFF) << 16; + } else { + R300_STATECHANGE(rmesa, vir[0]); + ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count = + r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0], + vb->AttribPtr, inputs, tab, nr); + R300_STATECHANGE(rmesa, vir[1]); + ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = + r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, + nr); + } + /* Setup INPUT_CNTL. */ R300_STATECHANGE(rmesa, vic); rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); @@ -337,6 +337,8 @@ int r300EmitArrays(GLcontext * ctx) rmesa->state.aos_count = nr; + radeon_bo_unmap(rmesa->radeon.dma.current); + return R300_FALLBACK_NONE; } @@ -347,13 +349,15 @@ void r300ReleaseArrays(GLcontext * ctx) if (rmesa->state.elt_dma_bo) { radeon_bo_unref(rmesa->state.elt_dma_bo); - rmesa->state.elt_dma_bo = 0; + rmesa->state.elt_dma_bo = NULL; } for (i = 0; i < rmesa->state.aos_count; i++) { if (rmesa->state.aos[i].bo) { - rmesa->state.aos[i].bo = radeon_bo_unref(rmesa->state.aos[i].bo); + radeon_bo_unref(rmesa->state.aos[i].bo); + rmesa->state.aos[i].bo = NULL; } } + radeonReleaseDmaRegion(&rmesa->radeon); } void r300EmitCacheFlush(r300ContextPtr rmesa) diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index d12fde175b..b0a579bf84 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -393,7 +393,7 @@ static void r300EmitClearState(GLcontext * ctx) R300_STATECHANGE(r300, fp); R300_STATECHANGE(r300, r500fp); - BEGIN_BATCH(14); + BEGIN_BATCH(7); OUT_BATCH_REGSEQ(R500_US_CONFIG, 2); OUT_BATCH(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); OUT_BATCH(0x0); @@ -619,10 +619,10 @@ void r300Flush(GLcontext * ctx) if (RADEON_DEBUG & DEBUG_IOCTL) fprintf(stderr, "%s\n", __FUNCTION__); - if (rmesa->swtcl.flush) { - rmesa->swtcl.flush(rmesa); - } - + if (rmesa->radeon.dma.flush) { + rmesa->radeon.dma.flush(ctx); + } + if (rmesa->radeon.cmdbuf.cs->cdw) { rcommonFlushCmdBuf(&rmesa->radeon, __FUNCTION__); } diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index ef3671eadb..57249c46ef 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -177,10 +177,8 @@ static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts) r300ContextPtr rmesa = R300_CONTEXT(ctx); void *out; - rmesa->state.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom, - 0, n_elts * 4, 4, - RADEON_GEM_DOMAIN_GTT, 0); - rmesa->state.elt_dma_offset = 0; + radeonAllocDmaRegion(&rmesa->radeon, &rmesa->state.elt_dma_bo, + &rmesa->state.elt_dma_offset, n_elts * 4, 4); radeon_bo_map(rmesa->state.elt_dma_bo, 1); out = rmesa->state.elt_dma_bo->ptr + rmesa->state.elt_dma_offset; memcpy(out, elts, n_elts * 4); diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c index eb86bd3bdd..1ce51b21f3 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -57,7 +57,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_ioctl.h" #include "r300_emit.h" -static void flush_last_swtcl_prim( r300ContextPtr rmesa ); +static void flush_last_swtcl_prim( GLcontext *ctx); void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset); @@ -241,26 +241,45 @@ static void r300SetVertexFormat( GLcontext *ctx ) /* Flush vertices in the current dma region. */ -static void flush_last_swtcl_prim( r300ContextPtr rmesa ) +static void flush_last_swtcl_prim( GLcontext *ctx ) { + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct radeon_dma *dma = &rmesa->radeon.dma; + + if (RADEON_DEBUG & DEBUG_IOCTL) fprintf(stderr, "%s\n", __FUNCTION__); - rmesa->swtcl.flush = NULL; - radeon_bo_unmap(rmesa->swtcl.bo); - rcommonEnsureCmdBufSpace(rmesa, - rmesa->hw.max_state_size + (12*sizeof(int)), - __FUNCTION__); - r300EmitState(rmesa); - r300EmitVertexAOS(rmesa, - rmesa->swtcl.vertex_size, - rmesa->swtcl.bo, - 0); - r300EmitVbufPrim(rmesa, - rmesa->swtcl.hw_primitive, - rmesa->swtcl.numverts); - r300EmitCacheFlush(rmesa); - COMMIT_BATCH(); - rmesa->swtcl.numverts = 0; + dma->flush = NULL; + + if (dma->current) { + GLuint current_offset = dma->current_used; + + assert (dma->current_used + + rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == + dma->current_vertexptr); + + radeon_bo_unmap(dma->current); + if (dma->current_used != dma->current_vertexptr) { + dma->current_used = dma->current_vertexptr; + + rcommonEnsureCmdBufSpace(rmesa, + rmesa->hw.max_state_size + (12*sizeof(int)), + __FUNCTION__); + r300EmitState(rmesa); + r300EmitVertexAOS(rmesa, + rmesa->swtcl.vertex_size, + dma->current, + current_offset); + + r300EmitVbufPrim(rmesa, + rmesa->swtcl.hw_primitive, + rmesa->swtcl.numverts); + r300EmitCacheFlush(rmesa); + COMMIT_BATCH(); + } + radeonReleaseDmaRegion(&rmesa->radeon); + rmesa->swtcl.numverts = 0; + } } /* Alloc space in the current dma region. @@ -269,15 +288,29 @@ static void * r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize ) { GLuint bytes = vsize * nverts; + void *head; - rmesa->swtcl.bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom, - 0, bytes, 4, RADEON_GEM_DOMAIN_GTT, 0); - radeon_bo_map(rmesa->swtcl.bo, 1); - if (rmesa->swtcl.flush == NULL) { - rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; - rmesa->swtcl.flush = flush_last_swtcl_prim; + if (!rmesa->radeon.dma.current || rmesa->radeon.dma.current_vertexptr + bytes > rmesa->radeon.dma.current->size) { + radeonRefillCurrentDmaRegion( &rmesa->radeon, bytes); } - return rmesa->swtcl.bo->ptr; + + if (!rmesa->radeon.dma.flush) { + rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; + rmesa->radeon.dma.flush = flush_last_swtcl_prim; + } + + ASSERT( vsize == rmesa->swtcl.vertex_size * 4 ); + ASSERT( rmesa->radeon.dma.flush == flush_last_swtcl_prim ); + ASSERT( rmesa->radeon.dma.current_used + + rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == + rmesa->dma.current_vertexptr ); + +// fprintf(stderr,"current %p %x\n", rmesa->radeon.dma.current->ptr, +// rmesa->radeon.dma.current_vertexptr); + head = (rmesa->radeon.dma.current->ptr + rmesa->radeon.dma.current_vertexptr); + rmesa->radeon.dma.current_vertexptr += bytes; + rmesa->swtcl.numverts += nverts; + return head; } static GLuint reduced_prim[] = { @@ -550,9 +583,9 @@ static void r300RenderStart(GLcontext *ctx) r300UpdateShaderStates(rmesa); r300EmitCacheFlush(rmesa); - if (rmesa->swtcl.flush != NULL) { - rmesa->swtcl.flush(rmesa); - } + if (rmesa->radeon.dma.flush != NULL) { + rmesa->radeon.dma.flush(ctx); + } } static void r300RenderFinish(GLcontext *ctx) -- cgit v1.2.3