diff options
Diffstat (limited to 'src/mesa/drivers/dri/r200/r200_tcl.c')
-rw-r--r-- | src/mesa/drivers/dri/r200/r200_tcl.c | 132 |
1 files changed, 94 insertions, 38 deletions
diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c index 99aecfe1e9..0f35d4d5b1 100644 --- a/src/mesa/drivers/dri/r200/r200_tcl.c +++ b/src/mesa/drivers/dri/r200/r200_tcl.c @@ -51,6 +51,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r200_swtcl.h" #include "r200_maos.h" +#include "radeon_common_context.h" + #define HAVE_POINTS 1 @@ -109,7 +111,7 @@ static GLboolean discrete_prim[0x10] = { #define ELT_INIT(prim, hw_prim) \ r200TclPrimitive( ctx, prim, hw_prim | R200_VF_PRIM_WALK_IND ) -#define GET_MESA_ELTS() rmesa->tcl.Elts +#define GET_MESA_ELTS() TNL_CONTEXT(ctx)->vb.Elts /* Don't really know how many elts will fit in what's left of cmdbuf, @@ -123,7 +125,7 @@ static GLboolean discrete_prim[0x10] = { #define RESET_STIPPLE() do { \ R200_STATECHANGE( rmesa, lin ); \ - r200EmitState( rmesa ); \ + radeonEmitState(&rmesa->radeon); \ } while (0) #define AUTO_STIPPLE( mode ) do { \ @@ -134,7 +136,7 @@ static GLboolean discrete_prim[0x10] = { else \ rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &= \ ~R200_LINE_PATTERN_AUTO_RESET; \ - r200EmitState( rmesa ); \ + radeonEmitState(&rmesa->radeon); \ } while (0) @@ -142,26 +144,22 @@ static GLboolean discrete_prim[0x10] = { static GLushort *r200AllocElts( r200ContextPtr rmesa, GLuint nr ) { - if (rmesa->dma.flush == r200FlushElts && - rmesa->store.cmd_used + nr*2 < R200_CMD_BUF_SZ) { + if (rmesa->radeon.dma.flush == r200FlushElts && + rmesa->tcl.elt_used + nr*2 < R200_ELT_BUF_SZ) { - GLushort *dest = (GLushort *)(rmesa->store.cmd_buf + - rmesa->store.cmd_used); + GLushort *dest = (GLushort *)(rmesa->radeon.tcl.elt_dma_bo->ptr + + rmesa->radeon.tcl.elt_dma_offset + rmesa->tcl.elt_used); - rmesa->store.cmd_used += nr*2; + rmesa->tcl.elt_used += nr*2; return dest; } else { - if (rmesa->dma.flush) - rmesa->dma.flush( rmesa ); - - r200EnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) + - rmesa->hw.max_state_size + ELTS_BUFSZ(nr) ); + if (rmesa->radeon.dma.flush) + rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); r200EmitAOS( rmesa, - rmesa->tcl.aos_components, - rmesa->tcl.nr_aos_components, 0 ); + rmesa->radeon.tcl.aos_count, 0 ); return r200AllocEltsOpenEnded( rmesa, rmesa->tcl.hw_primitive, nr ); } @@ -188,13 +186,11 @@ static void r200EmitPrim( GLcontext *ctx, r200ContextPtr rmesa = R200_CONTEXT( ctx ); r200TclPrimitive( ctx, prim, hwprim ); - r200EnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) + - rmesa->hw.max_state_size + VBUF_BUFSZ ); + // fprintf(stderr,"Emit prim %d\n", rmesa->radeon.tcl.aos_count); r200EmitAOS( rmesa, - rmesa->tcl.aos_components, - rmesa->tcl.nr_aos_components, - start ); + rmesa->radeon.tcl.aos_count, + start ); /* Why couldn't this packet have taken an offset param? */ @@ -207,6 +203,7 @@ static void r200EmitPrim( GLcontext *ctx, r200EmitPrim( ctx, prim, hwprim, start, count ); \ (void) rmesa; } while (0) +#define MAX_CONVERSION_SIZE 40 /* Try & join small primitives */ #if 0 @@ -369,6 +366,65 @@ r200ComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord ) } } +/** + * Predict total emit size for next rendering operation so there is no flush in middle of rendering + * Prediction has to aim towards the best possible value that is worse than worst case scenario + */ +static GLuint r200EnsureEmitSize( GLcontext * ctx , GLubyte* vimap_rev ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + GLuint space_required; + GLuint state_size; + GLuint nr_aos = 0; + int i; + /* predict number of aos to emit */ + for (i = 0; i < 15; ++i) + { + if (vimap_rev[i] != 255) + { + ++nr_aos; + } + } + + { + /* count the prediction for state size */ + space_required = 0; + state_size = radeonCountStateEmitSize( &rmesa->radeon ); + /* vtx may be changed in r200EmitArrays so account for it if not dirty */ + if (!rmesa->hw.vtx.dirty) + state_size += rmesa->hw.vtx.check(rmesa->radeon.glCtx, &rmesa->hw.vtx); + /* predict size for elements */ + for (i = 0; i < VB->PrimitiveCount; ++i) + { + if (!VB->Primitive[i].count) + continue; + /* If primitive.count is less than MAX_CONVERSION_SIZE + rendering code may decide convert to elts. + In that case we have to make pessimistic prediction. + and use larger of 2 paths. */ + const GLuint elts = ELTS_BUFSZ(nr_aos); + const GLuint index = INDEX_BUFSZ; + const GLuint vbuf = VBUF_BUFSZ; + if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE) + || vbuf > index + elts) + space_required += vbuf; + else + space_required += index + elts; + space_required += AOS_BUFSZ(nr_aos); + } + } + + if (RADEON_DEBUG & DEBUG_PRIMS) + fprintf(stderr, "%s space %u, aos %d\n", __func__, space_required, AOS_BUFSZ(nr_aos) ); + /* flush the buffer in case we need more than is left. */ + if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required + state_size, __FUNCTION__)) + return space_required + radeonCountStateEmitSize( &rmesa->radeon ); + else + return space_required + state_size; +} + /**********************************************************************/ /* Render pipeline stage */ @@ -394,7 +450,7 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx, /* TODO: separate this from the swtnl pipeline */ - if (rmesa->TclFallback) + if (rmesa->radeon.TclFallback) return GL_TRUE; /* fallback to software t&l */ if (R200_DEBUG & DEBUG_PRIMS) @@ -405,8 +461,9 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx, /* Validate state: */ - if (rmesa->NewGLState) - r200ValidateState( ctx ); + if (rmesa->radeon.NewGLState) + if (!r200ValidateState( ctx )) + return GL_TRUE; /* fallback to sw t&l */ if (!ctx->VertexProgram._Enabled) { /* NOTE: inputs != tnl->render_inputs - these are the untransformed @@ -481,11 +538,11 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx, /* Do the actual work: */ - r200ReleaseArrays( ctx, ~0 /* stage->changed_inputs */ ); + radeonReleaseArrays( ctx, ~0 /* stage->changed_inputs */ ); + GLuint emit_end = r200EnsureEmitSize( ctx, vimap_rev ) + + rmesa->radeon.cmdbuf.cs->cdw; r200EmitArrays( ctx, vimap_rev ); - rmesa->tcl.Elts = VB->Elts; - for (i = 0 ; i < VB->PrimitiveCount ; i++) { GLuint prim = _tnl_translate_prim(&VB->Primitive[i]); @@ -495,11 +552,14 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx, if (!length) continue; - if (rmesa->tcl.Elts) + if (VB->Elts) r200EmitEltPrimitive( ctx, start, start+length, prim ); else r200EmitPrimitive( ctx, start, start+length, prim ); } + if ( emit_end < rmesa->radeon.cmdbuf.cs->cdw ) + WARN_ONCE("Rendering was %d commands larger than predicted size." + " We might overflow command buffer.\n", rmesa->radeon.cmdbuf.cs->cdw - emit_end); return GL_FALSE; /* finished the pipe */ } @@ -545,7 +605,7 @@ static void transition_to_swtnl( GLcontext *ctx ) tnl->Driver.NotifyMaterialChange = _mesa_validate_all_lighting_tables; - r200ReleaseArrays( ctx, ~0 ); + radeonReleaseArrays( ctx, ~0 ); /* Still using the D3D based hardware-rasterizer from the radeon; * need to put the card into D3D mode to make it work: @@ -565,15 +625,11 @@ static void transition_to_hwtnl( GLcontext *ctx ) tnl->Driver.NotifyMaterialChange = r200UpdateMaterial; - if ( rmesa->dma.flush ) - rmesa->dma.flush( rmesa ); + if ( rmesa->radeon.dma.flush ) + rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); - rmesa->dma.flush = NULL; + rmesa->radeon.dma.flush = NULL; - if (rmesa->swtcl.indexed_verts.buf) - r200ReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, - __FUNCTION__ ); - R200_STATECHANGE( rmesa, vap ); rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_TCL_ENABLE; rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] &= ~R200_VAP_FORCE_W_TO_ONE; @@ -631,10 +687,10 @@ static char *getFallbackString(GLuint bit) void r200TclFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - GLuint oldfallback = rmesa->TclFallback; + GLuint oldfallback = rmesa->radeon.TclFallback; if (mode) { - rmesa->TclFallback |= bit; + rmesa->radeon.TclFallback |= bit; if (oldfallback == 0) { if (R200_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "R200 begin tcl fallback %s\n", @@ -643,7 +699,7 @@ void r200TclFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) } } else { - rmesa->TclFallback &= ~bit; + rmesa->radeon.TclFallback &= ~bit; if (oldfallback == bit) { if (R200_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "R200 end tcl fallback %s\n", |