diff options
Diffstat (limited to 'src/mesa/drivers/dri/radeon/radeon_tcl.c')
-rw-r--r-- | src/mesa/drivers/dri/radeon/radeon_tcl.c | 142 |
1 files changed, 105 insertions, 37 deletions
diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c index 779e9ae5df..2404f28450 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c @@ -42,6 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "tnl/tnl.h" #include "tnl/t_pipeline.h" +#include "radeon_common.h" #include "radeon_context.h" #include "radeon_state.h" #include "radeon_ioctl.h" @@ -49,6 +50,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_tcl.h" #include "radeon_swtcl.h" #include "radeon_maos.h" +#include "radeon_common_context.h" @@ -104,7 +106,7 @@ static GLboolean discrete_prim[0x10] = { }; -#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx) +#define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx) #define ELT_TYPE GLushort #define ELT_INIT(prim, hw_prim) \ @@ -125,7 +127,7 @@ static GLboolean discrete_prim[0x10] = { #define RESET_STIPPLE() do { \ RADEON_STATECHANGE( rmesa, lin ); \ - radeonEmitState( rmesa ); \ + radeonEmitState(&rmesa->radeon); \ } while (0) #define AUTO_STIPPLE( mode ) do { \ @@ -136,31 +138,26 @@ static GLboolean discrete_prim[0x10] = { else \ rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &= \ ~RADEON_LINE_PATTERN_AUTO_RESET; \ - radeonEmitState( rmesa ); \ + radeonEmitState(&rmesa->radeon); \ } while (0) #define ALLOC_ELTS(nr) radeonAllocElts( rmesa, nr ) -static GLushort *radeonAllocElts( radeonContextPtr rmesa, GLuint nr ) +static GLushort *radeonAllocElts( r100ContextPtr rmesa, GLuint nr ) { - if (rmesa->dma.flush) - rmesa->dma.flush( rmesa ); + if (rmesa->radeon.dma.flush) + rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); - radeonEnsureCmdBufSpace(rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) + - rmesa->hw.max_state_size + ELTS_BUFSZ(nr)); + radeonEmitAOS( rmesa, + rmesa->radeon.tcl.aos_count, 0 ); - radeonEmitAOS( rmesa, - rmesa->tcl.aos_components, - rmesa->tcl.nr_aos_components, 0 ); - - return radeonAllocEltsOpenEnded( rmesa, - rmesa->tcl.vertex_format, - rmesa->tcl.hw_primitive, nr ); + return radeonAllocEltsOpenEnded( rmesa, rmesa->tcl.vertex_format, + rmesa->tcl.hw_primitive, nr ); } -#define CLOSE_ELTS() RADEON_NEWPRIM( rmesa ) +#define CLOSE_ELTS() if (0) RADEON_NEWPRIM( rmesa ) @@ -174,15 +171,11 @@ static void radeonEmitPrim( GLcontext *ctx, GLuint start, GLuint count) { - radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); + r100ContextPtr rmesa = R100_CONTEXT( ctx ); radeonTclPrimitive( ctx, prim, hwprim ); - radeonEnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) + - rmesa->hw.max_state_size + VBUF_BUFSZ ); - radeonEmitAOS( rmesa, - rmesa->tcl.aos_components, - rmesa->tcl.nr_aos_components, + rmesa->radeon.tcl.aos_count, start ); /* Why couldn't this packet have taken an offset param? @@ -197,6 +190,8 @@ static void radeonEmitPrim( GLcontext *ctx, radeonEmitPrim( ctx, prim, hwprim, start, count ); \ (void) rmesa; } while (0) +#define MAX_CONVERSION_SIZE 40 + /* Try & join small primitives */ #if 0 @@ -254,7 +249,7 @@ void radeonTclPrimitive( GLcontext *ctx, GLenum prim, int hw_prim ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); GLuint se_cntl; GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE; @@ -361,6 +356,73 @@ radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord ) } } +/** + * Predict total emit size for next rendering operation so there is no flush in middle of rendering + * Prediction has to aim towards the best possible value that is worse than worst case scenario + */ +static GLuint radeonEnsureEmitSize( GLcontext * ctx , GLuint inputs ) +{ + r100ContextPtr rmesa = R100_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + GLuint space_required; + GLuint state_size; + GLuint nr_aos = 1; /* radeonEmitArrays does always emit one */ + int i; + /* list of flags that are allocating aos object */ + const GLuint flags_to_check[] = { + VERT_BIT_NORMAL, + VERT_BIT_COLOR0, + VERT_BIT_COLOR1, + VERT_BIT_FOG + }; + /* predict number of aos to emit */ + for (i=0; i < sizeof(flags_to_check)/sizeof(flags_to_check[0]); ++i) + { + if (inputs & flags_to_check[i]) + ++nr_aos; + } + for (i = 0; i < ctx->Const.MaxTextureUnits; ++i) + { + if (inputs & VERT_BIT_TEX(i)) + ++nr_aos; + } + + { + /* count the prediction for state size */ + space_required = 0; + state_size = radeonCountStateEmitSize( &rmesa->radeon ); + /* tcl may be changed in radeonEmitArrays so account for it if not dirty */ + if (!rmesa->hw.tcl.dirty) + state_size += rmesa->hw.tcl.check( rmesa->radeon.glCtx, &rmesa->hw.tcl ); + /* predict size for elements */ + for (i = 0; i < VB->PrimitiveCount; ++i) + { + if (!VB->Primitive[i].count) + continue; + /* If primitive.count is less than MAX_CONVERSION_SIZE + rendering code may decide convert to elts. + In that case we have to make pessimistic prediction. + and use larger of 2 paths. */ + const GLuint elts = ELTS_BUFSZ(nr_aos); + const GLuint index = INDEX_BUFSZ; + const GLuint vbuf = VBUF_BUFSZ; + if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE) + || vbuf > index + elts) + space_required += vbuf; + else + space_required += index + elts; + space_required += AOS_BUFSZ(nr_aos); + } + space_required += SCISSOR_BUFSZ; + } + /* flush the buffer in case we need more than is left. */ + if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required, __FUNCTION__)) + return space_required + radeonCountStateEmitSize( &rmesa->radeon ); + else + return space_required + state_size; +} + /**********************************************************************/ /* Render pipeline stage */ /**********************************************************************/ @@ -371,7 +433,7 @@ radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord ) static GLboolean radeon_run_tcl_render( GLcontext *ctx, struct tnl_pipeline_stage *stage ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0; @@ -379,7 +441,7 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx, /* TODO: separate this from the swtnl pipeline */ - if (rmesa->TclFallback) + if (rmesa->radeon.TclFallback) return GL_TRUE; /* fallback to software t&l */ if (VB->Count == 0) @@ -411,6 +473,8 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx, } radeonReleaseArrays( ctx, ~0 ); + GLuint emit_end = radeonEnsureEmitSize( ctx, inputs ) + + rmesa->radeon.cmdbuf.cs->cdw; radeonEmitArrays( ctx, inputs ); rmesa->tcl.Elts = VB->Elts; @@ -430,6 +494,10 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx, radeonEmitPrimitive( ctx, start, start+length, prim ); } + if (emit_end < rmesa->radeon.cmdbuf.cs->cdw) + WARN_ONCE("Rendering was %d commands larger than predicted size." + " We might overflow command buffer.\n", rmesa->radeon.cmdbuf.cs->cdw - emit_end); + return GL_FALSE; /* finished the pipe */ } @@ -461,7 +529,7 @@ const struct tnl_pipeline_stage _radeon_tcl_stage = static void transition_to_swtnl( GLcontext *ctx ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); GLuint se_cntl; @@ -490,7 +558,7 @@ static void transition_to_swtnl( GLcontext *ctx ) static void transition_to_hwtnl( GLcontext *ctx ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT]; @@ -509,15 +577,15 @@ static void transition_to_hwtnl( GLcontext *ctx ) tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial; - if ( rmesa->dma.flush ) - rmesa->dma.flush( rmesa ); + if ( rmesa->radeon.dma.flush ) + rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); - rmesa->dma.flush = NULL; + rmesa->radeon.dma.flush = NULL; rmesa->swtcl.vertex_format = 0; - if (rmesa->swtcl.indexed_verts.buf) - radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, - __FUNCTION__ ); + // if (rmesa->swtcl.indexed_verts.buf) + // radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, + // __FUNCTION__ ); if (RADEON_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "Radeon end tcl fallback\n"); @@ -550,11 +618,11 @@ static char *getFallbackString(GLuint bit) void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - GLuint oldfallback = rmesa->TclFallback; + r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLuint oldfallback = rmesa->radeon.TclFallback; if (mode) { - rmesa->TclFallback |= bit; + rmesa->radeon.TclFallback |= bit; if (oldfallback == 0) { if (RADEON_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "Radeon begin tcl fallback %s\n", @@ -563,7 +631,7 @@ void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) } } else { - rmesa->TclFallback &= ~bit; + rmesa->radeon.TclFallback &= ~bit; if (oldfallback == bit) { if (RADEON_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "Radeon end tcl fallback %s\n", |