From 570d4e375a327787441c2c7c4ae698e8993a5d6b Mon Sep 17 00:00:00 2001 From: Pauli Nieminen Date: Thu, 27 Aug 2009 14:21:13 +0300 Subject: radeon/r200/r300: Fix swtcl prediction to work after primitie change. Swtcl calls flush everytime primitive changes so prediction has to made again after flushing. --- src/mesa/drivers/dri/r200/r200_swtcl.c | 37 +++++++++++------ src/mesa/drivers/dri/r300/r300_swtcl.c | 47 ++++++++++++++-------- .../drivers/dri/radeon/radeon_common_context.h | 2 +- src/mesa/drivers/dri/radeon/radeon_swtcl.c | 41 +++++++++++++------ 4 files changed, 86 insertions(+), 41 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.c b/src/mesa/drivers/dri/r200/r200_swtcl.c index 6b7279e8db..3d4e701155 100644 --- a/src/mesa/drivers/dri/r200/r200_swtcl.c +++ b/src/mesa/drivers/dri/r200/r200_swtcl.c @@ -201,27 +201,35 @@ static void r200SetVertexFormat( GLcontext *ctx ) } } - -static void r200RenderStart( GLcontext *ctx ) +static void r200_predict_emit_size( GLcontext *ctx ) { + r200ContextPtr rmesa = R200_CONTEXT( ctx ); const int vertex_array_size = 7; const int prim_size = 3; - r200ContextPtr rmesa = R200_CONTEXT( ctx ); - r200SetVertexFormat( ctx ); - if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s\n", __func__); - if (!rmesa->radeon.swtcl.primitive_counter) { + if (!rmesa->radeon.swtcl.emit_prediction) { + const int state_size = radeonCountStateEmitSize(&rmesa->radeon); if (rcommonEnsureCmdBufSpace(&rmesa->radeon, - radeonCountStateEmitSize(&rmesa->radeon) + + state_size + vertex_array_size + prim_size, __FUNCTION__)) - rmesa->radeon.swtcl.primitive_counter = 0; + rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize(&rmesa->radeon); else - rmesa->radeon.swtcl.primitive_counter = 1; + rmesa->radeon.swtcl.emit_prediction = state_size; + rmesa->radeon.swtcl.emit_prediction += vertex_array_size + prim_size + + rmesa->radeon.cmdbuf.cs->cdw; } } +static void r200RenderStart( GLcontext *ctx ) +{ + r200SetVertexFormat( ctx ); + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s\n", __func__); + r200_predict_emit_size( ctx ); +} + + /** * Set vertex state for SW TCL. The primary purpose of this function is to * determine in advance whether or not the hardware can / should do the @@ -296,8 +304,13 @@ void r200_swtcl_flush(GLcontext *ctx, uint32_t current_offset) r200EmitVbufPrim( rmesa, rmesa->radeon.swtcl.hw_primitive, rmesa->radeon.swtcl.numverts); + if ( rmesa->radeon.swtcl.emit_prediction < rmesa->radeon.cmdbuf.cs->cdw ) + WARN_ONCE("Rendering was %d commands larger than predicted size." + " We might overflow command buffer.\n", + rmesa->radeon.cmdbuf.cs->cdw - rmesa->radeon.swtcl.emit_prediction ); - rmesa->radeon.swtcl.primitive_counter = 0; + rmesa->radeon.swtcl.emit_prediction = 0; + r200_predict_emit_size( ctx ); } @@ -905,7 +918,7 @@ void r200InitSwtcl( GLcontext *ctx ) init_rast_tab(); firsttime = 0; } - rmesa->radeon.swtcl.primitive_counter = 0; + rmesa->radeon.swtcl.emit_prediction = 0; tnl->Driver.Render.Start = r200RenderStart; tnl->Driver.Render.Finish = r200RenderFinish; diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c index 18af51e666..ca17f306d1 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -490,6 +490,27 @@ static void r300ChooseRenderState( GLcontext *ctx ) rmesa->radeon.swtcl.RenderIndex = index; } } +static void r300_predict_emit_size( GLcontext *ctx ) +{ + r300ContextPtr rmesa = R300_CONTEXT( ctx ); + if (!rmesa->radeon.swtcl.emit_prediction) { + const int vertex_size = 7; + const int prim_size = 3; + const int cache_flush_size = 4; + const int state_size = radeonCountStateEmitSize(&rmesa->radeon); + + if (rcommonEnsureCmdBufSpace(&rmesa->radeon, + state_size + + + vertex_size + prim_size, + __FUNCTION__)) + rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize(&rmesa->radeon); + else + rmesa->radeon.swtcl.emit_prediction = state_size; + + rmesa->radeon.swtcl.emit_prediction += rmesa->radeon.cmdbuf.cs->cdw + + vertex_size + prim_size + cache_flush_size * 2; + } +} void r300RenderStart(GLcontext *ctx) @@ -508,20 +529,7 @@ void r300RenderStart(GLcontext *ctx) r300UpdateShaderStates(rmesa); - const int vertex_size = 7; - const int prim_size = 3; - - if (!rmesa->radeon.swtcl.primitive_counter) { - if (rcommonEnsureCmdBufSpace(&rmesa->radeon, - radeonCountStateEmitSize(&rmesa->radeon) + - + vertex_size + prim_size, - __FUNCTION__)) - rmesa->radeon.swtcl.primitive_counter = 0; - else - rmesa->radeon.swtcl.primitive_counter = 1; - } - - r300EmitCacheFlush(rmesa); + r300_predict_emit_size( ctx ); /* investigate if we can put back flush optimisation if needed */ if (rmesa->radeon.dma.flush != NULL) { @@ -577,7 +585,7 @@ void r300InitSwtcl(GLcontext *ctx) init_rast_tab(); firsttime = 0; } - rmesa->radeon.swtcl.primitive_counter = 0; + rmesa->radeon.swtcl.emit_prediction = 0; tnl->Driver.Render.Start = r300RenderStart; tnl->Driver.Render.Finish = r300RenderFinish; @@ -644,6 +652,8 @@ void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset) fprintf(stderr, "%s\n", __func__); r300ContextPtr rmesa = R300_CONTEXT(ctx); + r300EmitCacheFlush(rmesa); + radeonEmitState(&rmesa->radeon); r300_emit_scissor(ctx); r300EmitVertexAOS(rmesa, @@ -655,6 +665,11 @@ void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset) rmesa->radeon.swtcl.hw_primitive, rmesa->radeon.swtcl.numverts); r300EmitCacheFlush(rmesa); - rmesa->radeon.swtcl.primitive_counter = 0; + if ( rmesa->radeon.swtcl.emit_prediction < rmesa->radeon.cmdbuf.cs->cdw ) + WARN_ONCE("Rendering was %d commands larger than predicted size." + " We might overflow command buffer.\n", + rmesa->radeon.cmdbuf.cs->cdw - rmesa->radeon.swtcl.emit_prediction ); + rmesa->radeon.swtcl.emit_prediction = 0; + r300_predict_emit_size( ctx ); COMMIT_BATCH(); } diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h index 07a0c4c055..427eb946ff 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h @@ -342,7 +342,7 @@ struct radeon_swtcl_info { struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX]; GLuint vertex_attr_count; - GLuint primitive_counter; + GLuint emit_prediction; }; #define RADEON_MAX_AOS_ARRAYS 16 diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c index c9e399f9b6..7358e22255 100644 --- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c @@ -220,6 +220,27 @@ static void radeonSetVertexFormat( GLcontext *ctx ) } } +static void radeon_predict_emit_size( GLcontext* ctx ) +{ + r100ContextPtr rmesa = R100_CONTEXT( ctx ); + + if (!rmesa->radeon.swtcl.emit_prediction) { + const int state_size = radeonCountStateEmitSize( &rmesa->radeon ); + const int scissor_size = 8; + const int prims_size = 8; + const int vertex_size = 7; + + if (rcommonEnsureCmdBufSpace(&rmesa->radeon, + state_size + + (scissor_size + prims_size + vertex_size), + __FUNCTION__)) + rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize( &rmesa->radeon ); + else + rmesa->radeon.swtcl.emit_prediction = state_size; + rmesa->radeon.swtcl.emit_prediction += scissor_size + prims_size + vertex_size + + rmesa->radeon.cmdbuf.cs->cdw; + } +} static void radeonRenderStart( GLcontext *ctx ) { @@ -230,16 +251,7 @@ static void radeonRenderStart( GLcontext *ctx ) if (rmesa->radeon.dma.flush != 0 && rmesa->radeon.dma.flush != rcommon_flush_last_swtcl_prim) rmesa->radeon.dma.flush( ctx ); - - if (!rmesa->radeon.swtcl.primitive_counter) { - if (rcommonEnsureCmdBufSpace(&rmesa->radeon, - radeonCountStateEmitSize( &rmesa->radeon ) + - (8 + 8 + 7), /* scissor + primis + VertexAOS */ - __FUNCTION__)) - rmesa->radeon.swtcl.primitive_counter = 0; - else - rmesa->radeon.swtcl.primitive_counter = 1; - } + radeon_predict_emit_size( ctx ); } @@ -307,9 +319,14 @@ void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset) rmesa->swtcl.vertex_format, rmesa->radeon.swtcl.hw_primitive, rmesa->radeon.swtcl.numverts); + if ( rmesa->radeon.swtcl.emit_prediction < rmesa->radeon.cmdbuf.cs->cdw ) + WARN_ONCE("Rendering was %d commands larger than predicted size." + " We might overflow command buffer.\n", + rmesa->radeon.cmdbuf.cs->cdw - rmesa->radeon.swtcl.emit_prediction ); + radeon_predict_emit_size( ctx ); - rmesa->radeon.swtcl.primitive_counter = 0; + rmesa->radeon.swtcl.emit_prediction = 0; } @@ -814,8 +831,8 @@ void radeonInitSwtcl( GLcontext *ctx ) if (firsttime) { init_rast_tab(); firsttime = 0; - rmesa->radeon.swtcl.primitive_counter = 0; } + rmesa->radeon.swtcl.emit_prediction = 0; tnl->Driver.Render.Start = radeonRenderStart; tnl->Driver.Render.Finish = radeonRenderFinish; -- cgit v1.2.3