r200: Prevent flush in middle of rendering.

Patch adds prediction functionthat tries to predict emit size to the smallest possible values that is quarenteed to be higher than worst case scenario in rendering pipeline. State emit size prediction code is in place but fix for emit sizes is included in next patch. Signed-off-by: Pauli Nieminen <suokkos@gmail.com>
author: Pauli Nieminen <suokkos@gmail.com> 2009-08-11 23:43:35 +0300
committer: Pauli Nieminen <suokkos@gmail.com> 2009-08-21 19:12:29 +0300
commit: fb1d0bfd47fb8790e0b350a0fad7bc0af39e70f4 (patch)
tree: c30d9e7c9860338a1aca6fbddd57ceed2bb101b7
parent: 7f8f486b36dc42a3818546c704321320fcdbc94b (diff)
4 files changed, 83 insertions, 3 deletions
diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.h b/src/mesa/drivers/dri/r200/r200_ioctl.h
index 2a4b8a11f4..f6419f5a2c 100644
--- a/src/mesa/drivers/dri/r200/r200_ioctl.h
+++ b/src/mesa/drivers/dri/r200/r200_ioctl.h
@@ -125,10 +125,12 @@ static INLINE int R200_DB_STATECHANGE(
  * are available, you will also be adding an rmesa->state.max_state_size because
  * r200EmitState is called from within r200EmitVbufPrim and r200FlushElts.
  */
-#define AOS_BUFSZ(nr)	((3 + ((nr / 2) * 3) + ((nr & 1) * 2)) * sizeof(int))
-#define VERT_AOS_BUFSZ	(5 * sizeof(int))
+#define AOS_BUFSZ(nr)	((3 + ((nr / 2) * 3) + ((nr & 1) * 2) + nr*2))
+#define VERT_AOS_BUFSZ	(5)
 #define ELTS_BUFSZ(nr)	(12 + nr * 2)
-#define VBUF_BUFSZ	(3 * sizeof(int))
+#define VBUF_BUFSZ	(3)
+#define SCISSOR_BUFSZ	(8)
+#define INDEX_BUFSZ	(8+2)
 
 static inline uint32_t cmdpacket3(int cmd_type)
 {
diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c
index ca9a8dbf8c..455a4bbd6b 100644
--- a/src/mesa/drivers/dri/r200/r200_tcl.c
+++ b/src/mesa/drivers/dri/r200/r200_tcl.c
@@ -206,6 +206,7 @@ static void r200EmitPrim( GLcontext *ctx,
    r200EmitPrim( ctx, prim, hwprim, start, count );             \
    (void) rmesa; } while (0)
 
+#define MAX_CONVERSION_SIZE 40
 /* Try & join small primitives
  */
 #if 0
@@ -368,6 +369,58 @@ r200ComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord )
    }
 }
 
+/**
+ * Predict total emit size for next rendering operation so there is no flush in middle of rendering
+ * Prediction has to aim towards the best possible value that is worse than worst case scenario
+ */
+static void r200EnsureEmitSize( GLcontext * ctx , GLubyte* vimap_rev )
+{
+  r200ContextPtr rmesa = R200_CONTEXT(ctx);
+  TNLcontext *tnl = TNL_CONTEXT(ctx);
+  struct vertex_buffer *VB = &tnl->vb;
+  GLuint space_required;
+  GLuint nr_aos = 0;
+  int i;
+  /* predict number of aos to emit */
+  for (i = 0; i < 15; ++i)
+  {
+    if (vimap_rev[i] != 255)
+    {
+      ++nr_aos;
+    }
+  }
+
+  {
+    /* count the prediction for state size */
+    space_required = radeonCountEmitSize( &rmesa->radeon );
+    /* vtx may be changed in r200EmitArrays so account for it if not dirty */
+    if (!rmesa->hw.vtx.dirty)
+      space_required += rmesa->hw.vtx.check(rmesa->radeon.glCtx, &rmesa->hw.vtx);
+    /* predict size for elements */
+    for (i = 0; i < VB->PrimitiveCount; ++i)
+    {
+      if (!VB->Primitive[i].count)
+	continue;
+      /* If primitive.count is less than MAX_CONVERSION_SIZE
+         rendering code may decide convert to elts.
+	 In that case we have to make pessimistic prediction.
+	 and use larger of 2 paths. */
+      const GLuint elts = ELTS_BUFSZ(nr_aos);
+      const GLuint index = INDEX_BUFSZ;
+      const GLuint vbuf = VBUF_BUFSZ;
+      if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE)
+	  || vbuf > index + elts)
+	space_required += vbuf;
+      else
+	space_required += index + elts;
+      space_required += AOS_BUFSZ(nr_aos);
+    }
+    space_required += SCISSOR_BUFSZ;
+  }
+  /* flush the buffer in case we need more than is left. */
+  rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required, __FUNCTION__);
+}
+
 
 /**********************************************************************/
 /*                          Render pipeline stage                     */
@@ -482,6 +535,7 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx,
    /* Do the actual work:
     */
    radeonReleaseArrays( ctx, ~0 /* stage->changed_inputs */ );
+   r200EnsureEmitSize( ctx, vimap_rev );
    r200EmitArrays( ctx, vimap_rev );
 
    rmesa->tcl.Elts = VB->Elts;
diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c
index b5b4fed8fa..20cf1f9a56 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common.c
@@ -946,6 +946,29 @@ static void radeon_print_state_atom_kmm(radeonContextPtr radeon, struct radeon_s
 	}
 }
 
+/**
+ * Count total size for next state emit.
+ **/
+GLuint radeonCountEmitSize(radeonContextPtr radeon)
+{
+   struct radeon_state_atom *atom;
+   int dwords = 0;
+   /* check if we are going to emit full state */
+   if (radeon->cmdbuf.cs->cdw && !radeon->hw.all_dirty) {
+      if (!radeon->hw.is_dirty)
+	 return dwords;
+      foreach(atom, &radeon->hw.atomlist) {
+         if (atom->dirty)
+            dwords += atom->check(radeon->glCtx, atom);
+      }
+   } else {
+      foreach(atom, &radeon->hw.atomlist) {
+	 dwords += atom->check(radeon->glCtx, atom);
+      }
+   }
+   return dwords;
+}
+
 static INLINE void radeonEmitAtoms(radeonContextPtr radeon, GLboolean dirty)
 {
 	BATCH_LOCALS(radeon);
diff --git a/src/mesa/drivers/dri/radeon/radeon_common.h b/src/mesa/drivers/dri/radeon/radeon_common.h
index cebae18b2d..6e81100d66 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common.h
+++ b/src/mesa/drivers/dri/radeon/radeon_common.h
@@ -24,6 +24,7 @@ void radeonUpdatePageFlipping(radeonContextPtr rmesa);
 void radeonFlush(GLcontext *ctx);
 void radeonFinish(GLcontext * ctx);
 void radeonEmitState(radeonContextPtr radeon);
+GLuint radeonCountEmitSize(radeonContextPtr radeon);
 
 void radeon_clear_tris(GLcontext *ctx, GLbitfield mask);
author	Pauli Nieminen <suokkos@gmail.com>	2009-08-11 23:43:35 +0300
committer	Pauli Nieminen <suokkos@gmail.com>	2009-08-21 19:12:29 +0300
commit	fb1d0bfd47fb8790e0b350a0fad7bc0af39e70f4 (patch)
tree	c30d9e7c9860338a1aca6fbddd57ceed2bb101b7
parent	7f8f486b36dc42a3818546c704321320fcdbc94b (diff)