diff options
| author | Pauli Nieminen <suokkos@gmail.com> | 2009-08-12 20:28:54 +0300 | 
|---|---|---|
| committer | Pauli Nieminen <suokkos@gmail.com> | 2009-08-21 19:12:30 +0300 | 
| commit | d1a0ece9077b3de49c293a04c220b995424cef28 (patch) | |
| tree | 372264edd89165b6d0af03a96c3698e06e9b1c37 /src/mesa/drivers | |
| parent | 60d156f68864c48e18dfc2691eca5e11c4000fa8 (diff) | |
radeon: Port flush preventation code from r200 to r100.
- emit prediction
- fixes to emit_sizes for data
- clean up of excesive use of radeonEnsureCmdBufferSpace
Signed-off-by: Pauli Nieminen <suokkos@gmail.com>
Diffstat (limited to 'src/mesa/drivers')
| -rw-r--r-- | src/mesa/drivers/dri/radeon/radeon_dma.c | 2 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/radeon/radeon_ioctl.h | 12 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/radeon/radeon_state_init.c | 264 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/radeon/radeon_swtcl.c | 2 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/radeon/radeon_tcl.c | 72 | 
5 files changed, 245 insertions, 107 deletions
| diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.c b/src/mesa/drivers/dri/radeon/radeon_dma.c index 7e6b74add8..79b2b77d30 100644 --- a/src/mesa/drivers/dri/radeon/radeon_dma.c +++ b/src/mesa/drivers/dri/radeon/radeon_dma.c @@ -431,7 +431,7 @@ restart:          if (!rmesa->dma.flush) {  		/* make sure we have enough space to use this in cmdbuf */     		rcommonEnsureCmdBufSpace(rmesa, -			      rmesa->hw.max_state_size + (20*sizeof(int)), +			      radeonCountEmitSize( rmesa ) + (20*sizeof(int)),  			      __FUNCTION__);  		/* if cmdbuf flushed DMA restart */  		if (is_empty_list(&rmesa->dma.reserved)) diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.h b/src/mesa/drivers/dri/radeon/radeon_ioctl.h index 18805d4c57..deb53ae313 100644 --- a/src/mesa/drivers/dri/radeon/radeon_ioctl.h +++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.h @@ -132,16 +132,18 @@ static INLINE int RADEON_DB_STATECHANGE(r100ContextPtr rmesa,   * r200EmitState is called from within r200EmitVbufPrim and r200FlushElts.   */  #if RADEON_OLD_PACKETS -#define AOS_BUFSZ(nr)	((3 + ((nr / 2) * 3) + ((nr & 1) * 2)) * sizeof(int)) +#define AOS_BUFSZ(nr)	((3 + ((nr / 2) * 3) + ((nr & 1) * 2))+nr*2)  #define VERT_AOS_BUFSZ	(0)  #define ELTS_BUFSZ(nr)	(24 + nr * 2) -#define VBUF_BUFSZ	(6 * sizeof(int)) +#define VBUF_BUFSZ	(8)  #else -#define AOS_BUFSZ(nr)	((3 + ((nr / 2) * 3) + ((nr & 1) * 2)) * sizeof(int)) -#define VERT_AOS_BUFSZ	(5 * sizeof(int)) +#define AOS_BUFSZ(nr)	((3 + ((nr / 2) * 3) + ((nr & 1) * 2) + nr*2)) +#define VERT_AOS_BUFSZ	(5)  #define ELTS_BUFSZ(nr)	(16 + nr * 2) -#define VBUF_BUFSZ	(4 * sizeof(int)) +#define VBUF_BUFSZ	(4)  #endif +#define SCISSOR_BUFSZ	(8) +#define INDEX_BUFSZ	(7)  static inline uint32_t cmdpacket3(int cmd_type) diff --git a/src/mesa/drivers/dri/radeon/radeon_state_init.c b/src/mesa/drivers/dri/radeon/radeon_state_init.c index 57aa7f1ca4..e70dbbe06f 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state_init.c +++ b/src/mesa/drivers/dri/radeon/radeon_state_init.c @@ -197,55 +197,84 @@ static int cmdscl( int offset, int stride, int count )     return h.i;  } -#define CHECK( NM, FLAG )				\ +#define CHECK( NM, FLAG, ADD )				\  static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom )	\  {							\ -   return FLAG ? atom->cmd_size : 0;			\ +   return FLAG ? atom->cmd_size + (ADD) : 0;			\  } -#define TCL_CHECK( NM, FLAG )				\ +#define TCL_CHECK( NM, FLAG, ADD )				\  static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom )	\  {							\     r100ContextPtr rmesa = R100_CONTEXT(ctx);	\ -   return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size : 0;	\ +   return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size + (ADD) : 0;	\  } -CHECK( always, GL_TRUE ) -CHECK( never, GL_FALSE ) -CHECK( tex0, ctx->Texture.Unit[0]._ReallyEnabled ) -CHECK( tex1, ctx->Texture.Unit[1]._ReallyEnabled ) +CHECK( always, GL_TRUE, 0 ) +CHECK( always_add2, GL_TRUE, 2 ) +CHECK( never, GL_FALSE, 0 ) +CHECK( tex0_mm, ctx->Texture.Unit[0]._ReallyEnabled, 3 ) +CHECK( tex1_mm, ctx->Texture.Unit[1]._ReallyEnabled, 3 )  /* need this for the cubic_map on disabled unit 2 bug, maybe r100 only? */ -CHECK( tex2, ctx->Texture._EnabledUnits ) -CHECK( cube0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_CUBE_BIT)) -CHECK( cube1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_CUBE_BIT)) -CHECK( cube2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_CUBE_BIT)) -CHECK( fog, ctx->Fog.Enabled ) -TCL_CHECK( tcl, GL_TRUE ) -TCL_CHECK( tcl_tex0, ctx->Texture.Unit[0]._ReallyEnabled ) -TCL_CHECK( tcl_tex1, ctx->Texture.Unit[1]._ReallyEnabled ) -TCL_CHECK( tcl_tex2, ctx->Texture.Unit[2]._ReallyEnabled ) -TCL_CHECK( tcl_lighting, ctx->Light.Enabled ) -TCL_CHECK( tcl_eyespace_or_lighting, ctx->_NeedEyeCoords || ctx->Light.Enabled ) -TCL_CHECK( tcl_lit0, ctx->Light.Enabled && ctx->Light.Light[0].Enabled ) -TCL_CHECK( tcl_lit1, ctx->Light.Enabled && ctx->Light.Light[1].Enabled ) -TCL_CHECK( tcl_lit2, ctx->Light.Enabled && ctx->Light.Light[2].Enabled ) -TCL_CHECK( tcl_lit3, ctx->Light.Enabled && ctx->Light.Light[3].Enabled ) -TCL_CHECK( tcl_lit4, ctx->Light.Enabled && ctx->Light.Light[4].Enabled ) -TCL_CHECK( tcl_lit5, ctx->Light.Enabled && ctx->Light.Light[5].Enabled ) -TCL_CHECK( tcl_lit6, ctx->Light.Enabled && ctx->Light.Light[6].Enabled ) -TCL_CHECK( tcl_lit7, ctx->Light.Enabled && ctx->Light.Light[7].Enabled ) -TCL_CHECK( tcl_ucp0, (ctx->Transform.ClipPlanesEnabled & 0x1) ) -TCL_CHECK( tcl_ucp1, (ctx->Transform.ClipPlanesEnabled & 0x2) ) -TCL_CHECK( tcl_ucp2, (ctx->Transform.ClipPlanesEnabled & 0x4) ) -TCL_CHECK( tcl_ucp3, (ctx->Transform.ClipPlanesEnabled & 0x8) ) -TCL_CHECK( tcl_ucp4, (ctx->Transform.ClipPlanesEnabled & 0x10) ) -TCL_CHECK( tcl_ucp5, (ctx->Transform.ClipPlanesEnabled & 0x20) ) -TCL_CHECK( tcl_eyespace_or_fog, ctx->_NeedEyeCoords || ctx->Fog.Enabled )  +CHECK( tex2_mm, ctx->Texture._EnabledUnits, 3 ) +CHECK( tex0, ctx->Texture.Unit[0]._ReallyEnabled, 2 ) +CHECK( tex1, ctx->Texture.Unit[1]._ReallyEnabled, 2 ) +CHECK( tex2, ctx->Texture._EnabledUnits, 2 ) +CHECK( cube0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_CUBE_BIT), 3 + 3*5 - CUBE_STATE_SIZE ) +CHECK( cube1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_CUBE_BIT), 3 + 3*5 - CUBE_STATE_SIZE ) +CHECK( cube2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_CUBE_BIT), 3 + 3*5 - CUBE_STATE_SIZE ) +CHECK( cube0_mm, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_CUBE_BIT), 2 + 4*5 - CUBE_STATE_SIZE ) +CHECK( cube1_mm, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_CUBE_BIT), 2 + 4*5 - CUBE_STATE_SIZE ) +CHECK( cube2_mm, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_CUBE_BIT), 2 + 4*5 - CUBE_STATE_SIZE ) +CHECK( fog, ctx->Fog.Enabled, 0 ) +CHECK( fog_add4, ctx->Fog.Enabled, 4 ) +TCL_CHECK( tcl, GL_TRUE, 0 ) +TCL_CHECK( tcl_add4, GL_TRUE, 4 ) +TCL_CHECK( tcl_tex0, ctx->Texture.Unit[0]._ReallyEnabled, 0 ) +TCL_CHECK( tcl_tex1, ctx->Texture.Unit[1]._ReallyEnabled, 0 ) +TCL_CHECK( tcl_tex2, ctx->Texture.Unit[2]._ReallyEnabled, 0 ) +TCL_CHECK( tcl_tex0_add4, ctx->Texture.Unit[0]._ReallyEnabled, 4 ) +TCL_CHECK( tcl_tex1_add4, ctx->Texture.Unit[1]._ReallyEnabled, 4 ) +TCL_CHECK( tcl_tex2_add4, ctx->Texture.Unit[2]._ReallyEnabled, 4 ) +TCL_CHECK( tcl_lighting, ctx->Light.Enabled, 0 ) +TCL_CHECK( tcl_lighting_add4, ctx->Light.Enabled, 4 ) +TCL_CHECK( tcl_eyespace_or_lighting, ctx->_NeedEyeCoords || ctx->Light.Enabled, 0 ) +TCL_CHECK( tcl_eyespace_or_lighting_add4, ctx->_NeedEyeCoords || ctx->Light.Enabled, 4 ) +TCL_CHECK( tcl_lit0, ctx->Light.Enabled && ctx->Light.Light[0].Enabled, 0 ) +TCL_CHECK( tcl_lit1, ctx->Light.Enabled && ctx->Light.Light[1].Enabled, 0 ) +TCL_CHECK( tcl_lit2, ctx->Light.Enabled && ctx->Light.Light[2].Enabled, 0 ) +TCL_CHECK( tcl_lit3, ctx->Light.Enabled && ctx->Light.Light[3].Enabled, 0 ) +TCL_CHECK( tcl_lit4, ctx->Light.Enabled && ctx->Light.Light[4].Enabled, 0 ) +TCL_CHECK( tcl_lit5, ctx->Light.Enabled && ctx->Light.Light[5].Enabled, 0 ) +TCL_CHECK( tcl_lit6, ctx->Light.Enabled && ctx->Light.Light[6].Enabled, 0 ) +TCL_CHECK( tcl_lit7, ctx->Light.Enabled && ctx->Light.Light[7].Enabled, 0 ) +TCL_CHECK( tcl_lit0_add6, ctx->Light.Enabled && ctx->Light.Light[0].Enabled, 6 ) +TCL_CHECK( tcl_lit1_add6, ctx->Light.Enabled && ctx->Light.Light[1].Enabled, 6 ) +TCL_CHECK( tcl_lit2_add6, ctx->Light.Enabled && ctx->Light.Light[2].Enabled, 6 ) +TCL_CHECK( tcl_lit3_add6, ctx->Light.Enabled && ctx->Light.Light[3].Enabled, 6 ) +TCL_CHECK( tcl_lit4_add6, ctx->Light.Enabled && ctx->Light.Light[4].Enabled, 6 ) +TCL_CHECK( tcl_lit5_add6, ctx->Light.Enabled && ctx->Light.Light[5].Enabled, 6 ) +TCL_CHECK( tcl_lit6_add6, ctx->Light.Enabled && ctx->Light.Light[6].Enabled, 6 ) +TCL_CHECK( tcl_lit7_add6, ctx->Light.Enabled && ctx->Light.Light[7].Enabled, 6 ) +TCL_CHECK( tcl_ucp0, (ctx->Transform.ClipPlanesEnabled & 0x1), 0 ) +TCL_CHECK( tcl_ucp1, (ctx->Transform.ClipPlanesEnabled & 0x2), 0 ) +TCL_CHECK( tcl_ucp2, (ctx->Transform.ClipPlanesEnabled & 0x4), 0 ) +TCL_CHECK( tcl_ucp3, (ctx->Transform.ClipPlanesEnabled & 0x8), 0 ) +TCL_CHECK( tcl_ucp4, (ctx->Transform.ClipPlanesEnabled & 0x10), 0 ) +TCL_CHECK( tcl_ucp5, (ctx->Transform.ClipPlanesEnabled & 0x20), 0 ) +TCL_CHECK( tcl_ucp0_add4, (ctx->Transform.ClipPlanesEnabled & 0x1), 4 ) +TCL_CHECK( tcl_ucp1_add4, (ctx->Transform.ClipPlanesEnabled & 0x2), 4 ) +TCL_CHECK( tcl_ucp2_add4, (ctx->Transform.ClipPlanesEnabled & 0x4), 4 ) +TCL_CHECK( tcl_ucp3_add4, (ctx->Transform.ClipPlanesEnabled & 0x8), 4 ) +TCL_CHECK( tcl_ucp4_add4, (ctx->Transform.ClipPlanesEnabled & 0x10), 4 ) +TCL_CHECK( tcl_ucp5_add4, (ctx->Transform.ClipPlanesEnabled & 0x20), 4 ) +TCL_CHECK( tcl_eyespace_or_fog, ctx->_NeedEyeCoords || ctx->Fog.Enabled, 0 ) +TCL_CHECK( tcl_eyespace_or_fog_add4, ctx->_NeedEyeCoords || ctx->Fog.Enabled, 4 ) -CHECK( txr0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_RECT_BIT)) -CHECK( txr1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_RECT_BIT)) -CHECK( txr2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_RECT_BIT)) +CHECK( txr0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_RECT_BIT), 0 ) +CHECK( txr1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_RECT_BIT), 0 ) +CHECK( txr2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_RECT_BIT), 0 )  #define OUT_VEC(hdr, data) do {			\      drm_radeon_cmd_header_t h;					\ @@ -271,9 +300,8 @@ static void scl_emit(GLcontext *ctx, struct radeon_state_atom *atom)  {     r100ContextPtr r100 = R100_CONTEXT(ctx);     BATCH_LOCALS(&r100->radeon); -   uint32_t dwords = atom->cmd_size; +   uint32_t dwords = atom->check(ctx, atom); -   dwords += 2;     BEGIN_BATCH_NO_AUTOSTATE(dwords);     OUT_SCL(atom->cmd[0], atom->cmd+1);     END_BATCH(); @@ -284,9 +312,8 @@ static void vec_emit(GLcontext *ctx, struct radeon_state_atom *atom)  {     r100ContextPtr r100 = R100_CONTEXT(ctx);     BATCH_LOCALS(&r100->radeon); -   uint32_t dwords = atom->cmd_size; +   uint32_t dwords = atom->check(ctx, atom); -   dwords += 4;     BEGIN_BATCH_NO_AUTOSTATE(dwords);     OUT_VEC(atom->cmd[0], atom->cmd+1);     END_BATCH(); @@ -297,9 +324,8 @@ static void lit_emit(GLcontext *ctx, struct radeon_state_atom *atom)  {     r100ContextPtr r100 = R100_CONTEXT(ctx);     BATCH_LOCALS(&r100->radeon); -   uint32_t dwords = atom->cmd_size; +   uint32_t dwords = atom->check(ctx, atom); -   dwords += 6;     BEGIN_BATCH_NO_AUTOSTATE(dwords);     OUT_VEC(atom->cmd[LIT_CMD_0], atom->cmd+1);     OUT_SCL(atom->cmd[LIT_CMD_1], atom->cmd+LIT_CMD_1+1); @@ -313,10 +339,10 @@ static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom)     struct radeon_renderbuffer *rrb;     uint32_t cbpitch;     uint32_t zbpitch, depth_fmt; -   uint32_t dwords = atom->cmd_size; +   uint32_t dwords = atom->check(ctx, atom);     /* output the first 7 bytes of context */ -   BEGIN_BATCH_NO_AUTOSTATE(dwords + 4); +   BEGIN_BATCH_NO_AUTOSTATE(dwords);     OUT_BATCH_TABLE(atom->cmd, 5);     rrb = radeon_get_depthbuffer(&r100->radeon); @@ -371,6 +397,28 @@ static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom)     END_BATCH();  } +static int check_always_ctx( GLcontext *ctx, struct radeon_state_atom *atom) +{ +   r100ContextPtr r100 = R100_CONTEXT(ctx); +   struct radeon_renderbuffer *rrb, *drb; +   uint32_t dwords; + +   rrb = radeon_get_colorbuffer(&r100->radeon); +   if (!rrb || !rrb->bo) { +      return 0; +   } + +   drb = radeon_get_depthbuffer(&r100->radeon); + +   dwords = 10; +   if (drb) +     dwords += 6; +   if (rrb) +     dwords += 8; + +   return dwords; +} +  static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)  {     r100ContextPtr r100 = R100_CONTEXT(ctx); @@ -378,7 +426,7 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)     struct radeon_renderbuffer *rrb, *drb;     uint32_t cbpitch = 0;     uint32_t zbpitch = 0; -   uint32_t dwords = atom->cmd_size; +   uint32_t dwords = atom->check(ctx, atom);     uint32_t depth_fmt;     rrb = radeon_get_colorbuffer(&r100->radeon); @@ -418,12 +466,6 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)     } -   /* output the first 7 bytes of context */ -   dwords = 10; -   if (drb) -     dwords += 6; -   if (rrb) -     dwords += 8;     BEGIN_BATCH_NO_AUTOSTATE(dwords);     /* In the CS case we need to split this up */ @@ -474,7 +516,7 @@ static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom)  {     r100ContextPtr r100 = R100_CONTEXT(ctx);     BATCH_LOCALS(&r100->radeon); -   uint32_t dwords = 3; +   uint32_t dwords = atom->check(ctx, atom);     int i = atom->idx, j;     radeonTexObj *t = r100->state.texture.unit[i].texobj;     radeon_mipmap_level *lvl; @@ -488,7 +530,7 @@ static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom)     if (!t->mt)  	return; -   BEGIN_BATCH_NO_AUTOSTATE(dwords + (5 * 3)); +   BEGIN_BATCH_NO_AUTOSTATE(dwords);     OUT_BATCH_TABLE(atom->cmd, 3);     lvl = &t->mt->levels[0];     for (j = 0; j < 5; j++) { @@ -502,7 +544,7 @@ static void cube_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)  {     r100ContextPtr r100 = R100_CONTEXT(ctx);     BATCH_LOCALS(&r100->radeon); -   uint32_t dwords = 2; +   uint32_t dwords = atom->check(ctx, atom);     int i = atom->idx, j;     radeonTexObj *t = r100->state.texture.unit[i].texobj;     radeon_mipmap_level *lvl; @@ -523,7 +565,7 @@ static void cube_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)  	default:  	case 0: base_reg = RADEON_PP_CUBIC_OFFSET_T0_0; break;     }; -   BEGIN_BATCH_NO_AUTOSTATE(dwords + (5 * 4)); +   BEGIN_BATCH_NO_AUTOSTATE(dwords);     OUT_BATCH_TABLE(atom->cmd, 2);     lvl = &t->mt->levels[0];     for (j = 0; j < 5; j++) { @@ -666,9 +708,10 @@ void radeonInitState( r100ContextPtr rmesa )     /* Allocate state buffers:      */     ALLOC_STATE( ctx, always, CTX_STATE_SIZE, "CTX/context", 0 ); -   if (rmesa->radeon.radeonScreen->kernel_mm) +   if (rmesa->radeon.radeonScreen->kernel_mm) {       rmesa->hw.ctx.emit = ctx_emit_cs; -   else +     rmesa->hw.ctx.check = check_always_ctx; +   } else       rmesa->hw.ctx.emit = ctx_emit;     ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 );     ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 ); @@ -678,13 +721,63 @@ void radeonInitState( r100ContextPtr rmesa )     ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 );     ALLOC_STATE( tcl, always, TCL_STATE_SIZE, "TCL/tcl", 1 );     ALLOC_STATE( mtl, tcl_lighting, MTL_STATE_SIZE, "MTL/material", 1 ); -   ALLOC_STATE( grd, always, GRD_STATE_SIZE, "GRD/guard-band", 1 ); -   ALLOC_STATE( fog, fog, FOG_STATE_SIZE, "FOG/fog", 1 ); -   ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 1 ); -   ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 1 ); -   ALLOC_STATE_IDX( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0, 0); -   ALLOC_STATE_IDX( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0, 1); -   ALLOC_STATE_IDX( tex[2], tex2, TEX_STATE_SIZE, "TEX/tex-2", 0, 2); +   if (rmesa->radeon.radeonScreen->kernel_mm) { +      ALLOC_STATE( grd, always_add2, GRD_STATE_SIZE, "GRD/guard-band", 1 ); +      ALLOC_STATE( fog, fog_add4, FOG_STATE_SIZE, "FOG/fog", 1 ); +      ALLOC_STATE( glt, tcl_lighting_add4, GLT_STATE_SIZE, "GLT/light-global", 1 ); +      ALLOC_STATE( eye, tcl_lighting_add4, EYE_STATE_SIZE, "EYE/eye-vector", 1 ); +      ALLOC_STATE_IDX( tex[0], tex0_mm, TEX_STATE_SIZE, "TEX/tex-0", 0, 0); +      ALLOC_STATE_IDX( tex[1], tex1_mm, TEX_STATE_SIZE, "TEX/tex-1", 0, 1); +      ALLOC_STATE_IDX( tex[2], tex2_mm, TEX_STATE_SIZE, "TEX/tex-2", 0, 2); +      ALLOC_STATE( mat[0], tcl_add4, MAT_STATE_SIZE, "MAT/modelproject", 1 ); +      ALLOC_STATE( mat[1], tcl_eyespace_or_fog_add4, MAT_STATE_SIZE, "MAT/modelview", 1 ); +      ALLOC_STATE( mat[2], tcl_eyespace_or_lighting_add4, MAT_STATE_SIZE, "MAT/it-modelview", 1 ); +      ALLOC_STATE( mat[3], tcl_tex0_add4, MAT_STATE_SIZE, "MAT/texmat0", 1 ); +      ALLOC_STATE( mat[4], tcl_tex1_add4, MAT_STATE_SIZE, "MAT/texmat1", 1 ); +      ALLOC_STATE( mat[5], tcl_tex2_add4, MAT_STATE_SIZE, "MAT/texmat2", 1 ); +      ALLOC_STATE( lit[0], tcl_lit0_add6, LIT_STATE_SIZE, "LIT/light-0", 1 ); +      ALLOC_STATE( lit[1], tcl_lit1_add6, LIT_STATE_SIZE, "LIT/light-1", 1 ); +      ALLOC_STATE( lit[2], tcl_lit2_add6, LIT_STATE_SIZE, "LIT/light-2", 1 ); +      ALLOC_STATE( lit[3], tcl_lit3_add6, LIT_STATE_SIZE, "LIT/light-3", 1 ); +      ALLOC_STATE( lit[4], tcl_lit4_add6, LIT_STATE_SIZE, "LIT/light-4", 1 ); +      ALLOC_STATE( lit[5], tcl_lit5_add6, LIT_STATE_SIZE, "LIT/light-5", 1 ); +      ALLOC_STATE( lit[6], tcl_lit6_add6, LIT_STATE_SIZE, "LIT/light-6", 1 ); +      ALLOC_STATE( lit[7], tcl_lit7_add6, LIT_STATE_SIZE, "LIT/light-7", 1 ); +      ALLOC_STATE( ucp[0], tcl_ucp0_add4, UCP_STATE_SIZE, "UCP/userclip-0", 1 ); +      ALLOC_STATE( ucp[1], tcl_ucp1_add4, UCP_STATE_SIZE, "UCP/userclip-1", 1 ); +      ALLOC_STATE( ucp[2], tcl_ucp2_add4, UCP_STATE_SIZE, "UCP/userclip-2", 1 ); +      ALLOC_STATE( ucp[3], tcl_ucp3_add4, UCP_STATE_SIZE, "UCP/userclip-3", 1 ); +      ALLOC_STATE( ucp[4], tcl_ucp4_add4, UCP_STATE_SIZE, "UCP/userclip-4", 1 ); +      ALLOC_STATE( ucp[5], tcl_ucp5_add4, UCP_STATE_SIZE, "UCP/userclip-5", 1 ); +   } else { +      ALLOC_STATE( grd, always, GRD_STATE_SIZE, "GRD/guard-band", 1 ); +      ALLOC_STATE( fog, fog, FOG_STATE_SIZE, "FOG/fog", 1 ); +      ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 1 ); +      ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 1 ); +      ALLOC_STATE_IDX( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0, 0); +      ALLOC_STATE_IDX( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0, 1); +      ALLOC_STATE_IDX( tex[2], tex2, TEX_STATE_SIZE, "TEX/tex-2", 0, 2); +      ALLOC_STATE( mat[0], tcl, MAT_STATE_SIZE, "MAT/modelproject", 1 ); +      ALLOC_STATE( mat[1], tcl_eyespace_or_fog, MAT_STATE_SIZE, "MAT/modelview", 1 ); +      ALLOC_STATE( mat[2], tcl_eyespace_or_lighting, MAT_STATE_SIZE, "MAT/it-modelview", 1 ); +      ALLOC_STATE( mat[3], tcl_tex0, MAT_STATE_SIZE, "MAT/texmat0", 1 ); +      ALLOC_STATE( mat[4], tcl_tex1, MAT_STATE_SIZE, "MAT/texmat1", 1 ); +      ALLOC_STATE( mat[5], tcl_tex2, MAT_STATE_SIZE, "MAT/texmat2", 1 ); +      ALLOC_STATE( lit[0], tcl_lit0, LIT_STATE_SIZE, "LIT/light-0", 1 ); +      ALLOC_STATE( lit[1], tcl_lit1, LIT_STATE_SIZE, "LIT/light-1", 1 ); +      ALLOC_STATE( lit[2], tcl_lit2, LIT_STATE_SIZE, "LIT/light-2", 1 ); +      ALLOC_STATE( lit[3], tcl_lit3, LIT_STATE_SIZE, "LIT/light-3", 1 ); +      ALLOC_STATE( lit[4], tcl_lit4, LIT_STATE_SIZE, "LIT/light-4", 1 ); +      ALLOC_STATE( lit[5], tcl_lit5, LIT_STATE_SIZE, "LIT/light-5", 1 ); +      ALLOC_STATE( lit[6], tcl_lit6, LIT_STATE_SIZE, "LIT/light-6", 1 ); +      ALLOC_STATE( lit[7], tcl_lit7, LIT_STATE_SIZE, "LIT/light-7", 1 ); +      ALLOC_STATE( ucp[0], tcl_ucp0, UCP_STATE_SIZE, "UCP/userclip-0", 1 ); +      ALLOC_STATE( ucp[1], tcl_ucp1, UCP_STATE_SIZE, "UCP/userclip-1", 1 ); +      ALLOC_STATE( ucp[2], tcl_ucp2, UCP_STATE_SIZE, "UCP/userclip-2", 1 ); +      ALLOC_STATE( ucp[3], tcl_ucp3, UCP_STATE_SIZE, "UCP/userclip-3", 1 ); +      ALLOC_STATE( ucp[4], tcl_ucp4, UCP_STATE_SIZE, "UCP/userclip-4", 1 ); +      ALLOC_STATE( ucp[5], tcl_ucp5, UCP_STATE_SIZE, "UCP/userclip-5", 1 ); +   }     for (i = 0; i < 3; i++) {        if (rmesa->radeon.radeonScreen->kernel_mm) @@ -694,14 +787,19 @@ void radeonInitState( r100ContextPtr rmesa )     }     if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR100)     { -      ALLOC_STATE_IDX( cube[0], cube0, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 ); -      ALLOC_STATE_IDX( cube[1], cube1, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 ); -      ALLOC_STATE_IDX( cube[2], cube2, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 ); -      for (i = 0; i < 3; i++) -          if (rmesa->radeon.radeonScreen->kernel_mm) -              rmesa->hw.cube[i].emit = cube_emit_cs; -          else -              rmesa->hw.cube[i].emit = cube_emit; +      if (rmesa->radeon.radeonScreen->kernel_mm) { +         ALLOC_STATE_IDX( cube[0], cube0_mm, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 ); +         ALLOC_STATE_IDX( cube[1], cube1_mm, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 ); +         ALLOC_STATE_IDX( cube[2], cube2_mm, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 ); +         for (i = 0; i < 3; i++) +            rmesa->hw.cube[i].emit = cube_emit_cs; +      } else { +         ALLOC_STATE_IDX( cube[0], cube0, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 ); +         ALLOC_STATE_IDX( cube[1], cube1, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 ); +         ALLOC_STATE_IDX( cube[2], cube2, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 ); +         for (i = 0; i < 3; i++) +            rmesa->hw.cube[i].emit = cube_emit; +      }     }     else     { @@ -709,26 +807,6 @@ void radeonInitState( r100ContextPtr rmesa )        ALLOC_STATE_IDX( cube[1], never, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 );        ALLOC_STATE_IDX( cube[2], never, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 );     } -   ALLOC_STATE( mat[0], tcl, MAT_STATE_SIZE, "MAT/modelproject", 1 ); -   ALLOC_STATE( mat[1], tcl_eyespace_or_fog, MAT_STATE_SIZE, "MAT/modelview", 1 ); -   ALLOC_STATE( mat[2], tcl_eyespace_or_lighting, MAT_STATE_SIZE, "MAT/it-modelview", 1 ); -   ALLOC_STATE( mat[3], tcl_tex0, MAT_STATE_SIZE, "MAT/texmat0", 1 ); -   ALLOC_STATE( mat[4], tcl_tex1, MAT_STATE_SIZE, "MAT/texmat1", 1 ); -   ALLOC_STATE( mat[5], tcl_tex2, MAT_STATE_SIZE, "MAT/texmat2", 1 ); -   ALLOC_STATE( ucp[0], tcl_ucp0, UCP_STATE_SIZE, "UCP/userclip-0", 1 ); -   ALLOC_STATE( ucp[1], tcl_ucp1, UCP_STATE_SIZE, "UCP/userclip-1", 1 ); -   ALLOC_STATE( ucp[2], tcl_ucp2, UCP_STATE_SIZE, "UCP/userclip-2", 1 ); -   ALLOC_STATE( ucp[3], tcl_ucp3, UCP_STATE_SIZE, "UCP/userclip-3", 1 ); -   ALLOC_STATE( ucp[4], tcl_ucp4, UCP_STATE_SIZE, "UCP/userclip-4", 1 ); -   ALLOC_STATE( ucp[5], tcl_ucp5, UCP_STATE_SIZE, "UCP/userclip-5", 1 ); -   ALLOC_STATE( lit[0], tcl_lit0, LIT_STATE_SIZE, "LIT/light-0", 1 ); -   ALLOC_STATE( lit[1], tcl_lit1, LIT_STATE_SIZE, "LIT/light-1", 1 ); -   ALLOC_STATE( lit[2], tcl_lit2, LIT_STATE_SIZE, "LIT/light-2", 1 ); -   ALLOC_STATE( lit[3], tcl_lit3, LIT_STATE_SIZE, "LIT/light-3", 1 ); -   ALLOC_STATE( lit[4], tcl_lit4, LIT_STATE_SIZE, "LIT/light-4", 1 ); -   ALLOC_STATE( lit[5], tcl_lit5, LIT_STATE_SIZE, "LIT/light-5", 1 ); -   ALLOC_STATE( lit[6], tcl_lit6, LIT_STATE_SIZE, "LIT/light-6", 1 ); -   ALLOC_STATE( lit[7], tcl_lit7, LIT_STATE_SIZE, "LIT/light-7", 1 );     ALLOC_STATE_IDX( txr[0], txr0, TXR_STATE_SIZE, "TXR/txr-0", 0, 0 );     ALLOC_STATE_IDX( txr[1], txr1, TXR_STATE_SIZE, "TXR/txr-1", 0, 1 );     ALLOC_STATE_IDX( txr[2], txr2, TXR_STATE_SIZE, "TXR/txr-2", 0, 2 ); diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c index 58b3be9391..a1b398e560 100644 --- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c @@ -285,7 +285,7 @@ void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset)     r100ContextPtr rmesa = R100_CONTEXT(ctx);     rcommonEnsureCmdBufSpace(&rmesa->radeon, -			    rmesa->radeon.hw.max_state_size + (12*sizeof(int)), +			    radeonCountEmitSize( &rmesa->radeon ) + (12*sizeof(int)),  			    __FUNCTION__); diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c index df6708f05e..c5a73a2818 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c @@ -149,9 +149,6 @@ static GLushort *radeonAllocElts( r100ContextPtr rmesa, GLuint nr )        if (rmesa->radeon.dma.flush)  	 rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); -      rcommonEnsureCmdBufSpace(&rmesa->radeon, rmesa->radeon.hw.max_state_size + ELTS_BUFSZ(nr) +  -			       AOS_BUFSZ(rmesa->radeon.tcl.aos_count), __FUNCTION__); -        radeonEmitAOS( rmesa,  		     rmesa->radeon.tcl.aos_count, 0 ); @@ -176,10 +173,6 @@ static void radeonEmitPrim( GLcontext *ctx,     r100ContextPtr rmesa = R100_CONTEXT( ctx );     radeonTclPrimitive( ctx, prim, hwprim ); -   rcommonEnsureCmdBufSpace( &rmesa->radeon, -			     AOS_BUFSZ(rmesa->radeon.tcl.aos_count) + -			     rmesa->radeon.hw.max_state_size + VBUF_BUFSZ, __FUNCTION__ ); -     radeonEmitAOS( rmesa,  		  rmesa->radeon.tcl.aos_count,  		  start ); @@ -196,6 +189,8 @@ static void radeonEmitPrim( GLcontext *ctx,     radeonEmitPrim( ctx, prim, hwprim, start, count );           \     (void) rmesa; } while (0) +#define MAX_CONVERSION_SIZE 40 +  /* Try & join small primitives   */  #if 0 @@ -360,6 +355,68 @@ radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord )     }  } +/** + * Predict total emit size for next rendering operation so there is no flush in middle of rendering + * Prediction has to aim towards the best possible value that is worse than worst case scenario + */ +static void radeonEnsureEmitSize( GLcontext * ctx , GLuint inputs ) +{ +  r100ContextPtr rmesa = R100_CONTEXT(ctx); +  TNLcontext *tnl = TNL_CONTEXT(ctx); +  struct vertex_buffer *VB = &tnl->vb; +  GLuint space_required; +  GLuint nr_aos = 1; /* radeonEmitArrays does always emit one */ +  int i; +  /* list of flags that are allocating aos object */ +  const GLuint flags_to_check[] = { +    VERT_BIT_NORMAL, +    VERT_BIT_COLOR0, +    VERT_BIT_COLOR1, +    VERT_BIT_FOG +  }; +  /* predict number of aos to emit */ +  for (i=0; i < sizeof(flags_to_check)/sizeof(flags_to_check[0]); ++i) +  { +    if (inputs & flags_to_check[i]) +      ++nr_aos; +  } +  for (i = 0; i < ctx->Const.MaxTextureUnits; ++i) +  { +    if (inputs & VERT_BIT_TEX(i)) +      ++nr_aos; +  } + +  { +    /* count the prediction for state size */ +    space_required = radeonCountEmitSize( &rmesa->radeon ); +    /* tcl may be changed in radeonEmitArrays so account for it if not dirty */ +    if (!rmesa->hw.tcl.dirty) +      space_required += rmesa->hw.tcl.check( rmesa->radeon.glCtx, &rmesa->hw.tcl ); +    /* predict size for elements */ +    for (i = 0; i < VB->PrimitiveCount; ++i) +    { +      if (!VB->Primitive[i].count) +	continue; +      /* If primitive.count is less than MAX_CONVERSION_SIZE +	 rendering code may decide convert to elts. +	 In that case we have to make pessimistic prediction. +	 and use larger of 2 paths. */ +      const GLuint elts = ELTS_BUFSZ(nr_aos); +      const GLuint index = INDEX_BUFSZ; +      const GLuint vbuf = VBUF_BUFSZ; +      if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE) +	  || vbuf > index + elts) +	space_required += vbuf; +      else +	space_required += index + elts; +      space_required += AOS_BUFSZ(nr_aos); +    } +    space_required += SCISSOR_BUFSZ; +  } +  /* flush the buffer in case we need more than is left. */ +  rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required, __FUNCTION__); +} +  /**********************************************************************/  /*                          Render pipeline stage                     */  /**********************************************************************/ @@ -410,6 +467,7 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx,     }     radeonReleaseArrays( ctx, ~0 ); +   radeonEnsureEmitSize( ctx, inputs );     radeonEmitArrays( ctx, inputs );     rmesa->tcl.Elts = VB->Elts; | 
