summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/radeon/radeon_tcl.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/radeon/radeon_tcl.c')
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_tcl.c148
1 files changed, 108 insertions, 40 deletions
diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c
index 779e9ae5df..b334ea05e5 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c
@@ -42,6 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "tnl/tnl.h"
#include "tnl/t_pipeline.h"
+#include "radeon_common.h"
#include "radeon_context.h"
#include "radeon_state.h"
#include "radeon_ioctl.h"
@@ -49,6 +50,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "radeon_tcl.h"
#include "radeon_swtcl.h"
#include "radeon_maos.h"
+#include "radeon_common_context.h"
@@ -104,7 +106,7 @@ static GLboolean discrete_prim[0x10] = {
};
-#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
+#define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx)
#define ELT_TYPE GLushort
#define ELT_INIT(prim, hw_prim) \
@@ -125,7 +127,7 @@ static GLboolean discrete_prim[0x10] = {
#define RESET_STIPPLE() do { \
RADEON_STATECHANGE( rmesa, lin ); \
- radeonEmitState( rmesa ); \
+ radeonEmitState(&rmesa->radeon); \
} while (0)
#define AUTO_STIPPLE( mode ) do { \
@@ -136,31 +138,26 @@ static GLboolean discrete_prim[0x10] = {
else \
rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &= \
~RADEON_LINE_PATTERN_AUTO_RESET; \
- radeonEmitState( rmesa ); \
+ radeonEmitState(&rmesa->radeon); \
} while (0)
#define ALLOC_ELTS(nr) radeonAllocElts( rmesa, nr )
-static GLushort *radeonAllocElts( radeonContextPtr rmesa, GLuint nr )
+static GLushort *radeonAllocElts( r100ContextPtr rmesa, GLuint nr )
{
- if (rmesa->dma.flush)
- rmesa->dma.flush( rmesa );
+ if (rmesa->radeon.dma.flush)
+ rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
- radeonEnsureCmdBufSpace(rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
- rmesa->hw.max_state_size + ELTS_BUFSZ(nr));
+ radeonEmitAOS( rmesa,
+ rmesa->radeon.tcl.aos_count, 0 );
- radeonEmitAOS( rmesa,
- rmesa->tcl.aos_components,
- rmesa->tcl.nr_aos_components, 0 );
-
- return radeonAllocEltsOpenEnded( rmesa,
- rmesa->tcl.vertex_format,
- rmesa->tcl.hw_primitive, nr );
+ return radeonAllocEltsOpenEnded( rmesa, rmesa->tcl.vertex_format,
+ rmesa->tcl.hw_primitive, nr );
}
-#define CLOSE_ELTS() RADEON_NEWPRIM( rmesa )
+#define CLOSE_ELTS() if (0) RADEON_NEWPRIM( rmesa )
@@ -174,15 +171,11 @@ static void radeonEmitPrim( GLcontext *ctx,
GLuint start,
GLuint count)
{
- radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+ r100ContextPtr rmesa = R100_CONTEXT( ctx );
radeonTclPrimitive( ctx, prim, hwprim );
- radeonEnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
- rmesa->hw.max_state_size + VBUF_BUFSZ );
-
radeonEmitAOS( rmesa,
- rmesa->tcl.aos_components,
- rmesa->tcl.nr_aos_components,
+ rmesa->radeon.tcl.aos_count,
start );
/* Why couldn't this packet have taken an offset param?
@@ -197,6 +190,8 @@ static void radeonEmitPrim( GLcontext *ctx,
radeonEmitPrim( ctx, prim, hwprim, start, count ); \
(void) rmesa; } while (0)
+#define MAX_CONVERSION_SIZE 40
+
/* Try & join small primitives
*/
#if 0
@@ -254,7 +249,7 @@ void radeonTclPrimitive( GLcontext *ctx,
GLenum prim,
int hw_prim )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
GLuint se_cntl;
GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE;
@@ -361,6 +356,73 @@ radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord )
}
}
+/**
+ * Predict total emit size for next rendering operation so there is no flush in middle of rendering
+ * Prediction has to aim towards the best possible value that is worse than worst case scenario
+ */
+static GLuint radeonEnsureEmitSize( GLcontext * ctx , GLuint inputs )
+{
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *VB = &tnl->vb;
+ GLuint space_required;
+ GLuint state_size;
+ GLuint nr_aos = 1; /* radeonEmitArrays does always emit one */
+ int i;
+ /* list of flags that are allocating aos object */
+ const GLuint flags_to_check[] = {
+ VERT_BIT_NORMAL,
+ VERT_BIT_COLOR0,
+ VERT_BIT_COLOR1,
+ VERT_BIT_FOG
+ };
+ /* predict number of aos to emit */
+ for (i=0; i < sizeof(flags_to_check)/sizeof(flags_to_check[0]); ++i)
+ {
+ if (inputs & flags_to_check[i])
+ ++nr_aos;
+ }
+ for (i = 0; i < ctx->Const.MaxTextureUnits; ++i)
+ {
+ if (inputs & VERT_BIT_TEX(i))
+ ++nr_aos;
+ }
+
+ {
+ /* count the prediction for state size */
+ space_required = 0;
+ state_size = radeonCountStateEmitSize( &rmesa->radeon );
+ /* tcl may be changed in radeonEmitArrays so account for it if not dirty */
+ if (!rmesa->hw.tcl.dirty)
+ state_size += rmesa->hw.tcl.check( rmesa->radeon.glCtx, &rmesa->hw.tcl );
+ /* predict size for elements */
+ for (i = 0; i < VB->PrimitiveCount; ++i)
+ {
+ if (!VB->Primitive[i].count)
+ continue;
+ /* If primitive.count is less than MAX_CONVERSION_SIZE
+ rendering code may decide convert to elts.
+ In that case we have to make pessimistic prediction.
+ and use larger of 2 paths. */
+ const GLuint elts = ELTS_BUFSZ(nr_aos);
+ const GLuint index = INDEX_BUFSZ;
+ const GLuint vbuf = VBUF_BUFSZ;
+ if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE)
+ || vbuf > index + elts)
+ space_required += vbuf;
+ else
+ space_required += index + elts;
+ space_required += AOS_BUFSZ(nr_aos);
+ }
+ space_required += SCISSOR_BUFSZ;
+ }
+ /* flush the buffer in case we need more than is left. */
+ if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required, __FUNCTION__))
+ return space_required + radeonCountStateEmitSize( &rmesa->radeon );
+ else
+ return space_required + state_size;
+}
+
/**********************************************************************/
/* Render pipeline stage */
/**********************************************************************/
@@ -371,7 +433,7 @@ radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord )
static GLboolean radeon_run_tcl_render( GLcontext *ctx,
struct tnl_pipeline_stage *stage )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
TNLcontext *tnl = TNL_CONTEXT(ctx);
struct vertex_buffer *VB = &tnl->vb;
GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0;
@@ -379,7 +441,7 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx,
/* TODO: separate this from the swtnl pipeline
*/
- if (rmesa->TclFallback)
+ if (rmesa->radeon.TclFallback)
return GL_TRUE; /* fallback to software t&l */
if (VB->Count == 0)
@@ -411,6 +473,8 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx,
}
radeonReleaseArrays( ctx, ~0 );
+ GLuint emit_end = radeonEnsureEmitSize( ctx, inputs )
+ + rmesa->radeon.cmdbuf.cs->cdw;
radeonEmitArrays( ctx, inputs );
rmesa->tcl.Elts = VB->Elts;
@@ -430,6 +494,10 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx,
radeonEmitPrimitive( ctx, start, start+length, prim );
}
+ if (emit_end < rmesa->radeon.cmdbuf.cs->cdw)
+ WARN_ONCE("Rendering was %d commands larger than predicted size."
+ " We might overflow command buffer.\n", rmesa->radeon.cmdbuf.cs->cdw - emit_end);
+
return GL_FALSE; /* finished the pipe */
}
@@ -461,7 +529,7 @@ const struct tnl_pipeline_stage _radeon_tcl_stage =
static void transition_to_swtnl( GLcontext *ctx )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
TNLcontext *tnl = TNL_CONTEXT(ctx);
GLuint se_cntl;
@@ -490,7 +558,7 @@ static void transition_to_swtnl( GLcontext *ctx )
static void transition_to_hwtnl( GLcontext *ctx )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
TNLcontext *tnl = TNL_CONTEXT(ctx);
GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
@@ -509,17 +577,17 @@ static void transition_to_hwtnl( GLcontext *ctx )
tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial;
- if ( rmesa->dma.flush )
- rmesa->dma.flush( rmesa );
+ if ( rmesa->radeon.dma.flush )
+ rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
- rmesa->dma.flush = NULL;
+ rmesa->radeon.dma.flush = NULL;
rmesa->swtcl.vertex_format = 0;
- if (rmesa->swtcl.indexed_verts.buf)
- radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts,
- __FUNCTION__ );
+ // if (rmesa->swtcl.indexed_verts.buf)
+ // radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts,
+ // __FUNCTION__ );
- if (RADEON_DEBUG & DEBUG_FALLBACKS)
+ if (RADEON_DEBUG & RADEON_FALLBACKS)
fprintf(stderr, "Radeon end tcl fallback\n");
}
@@ -550,22 +618,22 @@ static char *getFallbackString(GLuint bit)
void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
- GLuint oldfallback = rmesa->TclFallback;
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ GLuint oldfallback = rmesa->radeon.TclFallback;
if (mode) {
- rmesa->TclFallback |= bit;
+ rmesa->radeon.TclFallback |= bit;
if (oldfallback == 0) {
- if (RADEON_DEBUG & DEBUG_FALLBACKS)
+ if (RADEON_DEBUG & RADEON_FALLBACKS)
fprintf(stderr, "Radeon begin tcl fallback %s\n",
getFallbackString( bit ));
transition_to_swtnl( ctx );
}
}
else {
- rmesa->TclFallback &= ~bit;
+ rmesa->radeon.TclFallback &= ~bit;
if (oldfallback == bit) {
- if (RADEON_DEBUG & DEBUG_FALLBACKS)
+ if (RADEON_DEBUG & RADEON_FALLBACKS)
fprintf(stderr, "Radeon end tcl fallback %s\n",
getFallbackString( bit ));
transition_to_hwtnl( ctx );