From 7da98d7ebaf4475812f2ce44062d50bee393faf7 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 9 Jan 2008 14:21:52 -0800
Subject: [965] Allow more than one draw_prims per batchbuffer.

The comment about (vbo)_exec_api.c appeared to be stale, as the VBO code seems
to only use non-named VBOs (not actual VBOs) or freshly-allocated VBO data.

This brings a 2x speedup to openarena, because we can submit nearly-full
batchbuffers instead of many 450-byte ones.
---
 src/mesa/drivers/dri/i965/brw_context.h |  1 +
 src/mesa/drivers/dri/i965/brw_draw.c    | 20 +++++++++++++-------
 src/mesa/drivers/dri/i965/brw_vtbl.c    |  3 +++
 3 files changed, 17 insertions(+), 7 deletions(-)

(limited to 'src/mesa/drivers/dri/i965')

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 0ffc9f6238..8a71a4f500 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -414,6 +414,7 @@ struct brw_context
    GLboolean emit_state_always;
    GLboolean wrap;
    GLboolean tmp_fallback;
+   GLboolean no_batch_wrap;
 
    struct {
       struct brw_state_flags dirty;
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 9225748aaa..887eebe475 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -270,12 +270,23 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
    LOCK_HARDWARE(intel);
 
    if (brw->intel.numClipRects == 0) {
-      assert(intel->batch->ptr == intel->batch->map);
       UNLOCK_HARDWARE(intel);
       return GL_TRUE;
    }
 
    {
+      /* Flush the batch if it's approaching full, so that we don't wrap while
+       * we've got validated state that needs to be in the same batch as the
+       * primitives.  This fraction is just a guess (minimal full state plus
+       * a primitive is around 512 bytes), and would be better if we had
+       * an upper bound of how much we might emit in a single
+       * brw_try_draw_prims().
+       */
+      if (intel->batch->ptr - intel->batch->map > intel->batch->size * 3 / 4)
+	 intel_batchbuffer_flush(intel->batch);
+
+      brw->no_batch_wrap = GL_TRUE;
+
       /* Set the first primitive early, ahead of validate_state:
        */
       brw_set_prim(brw, prim[0].mode);
@@ -310,12 +321,7 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
 
  out:
 
-   /* Currently have to do this to synchronize with the map/unmap of
-    * the vertex buffer in brw_exec_api.c.  Not sure if there is any
-    * way around this, as not every flush is due to a buffer filling
-    * up.
-    */
-   intel_batchbuffer_flush( brw->intel.batch );
+   brw->no_batch_wrap = GL_FALSE;
 
    /* Free any old data so it doesn't clog up texture memory - we
     * won't be referencing it again.
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
index 277c8dca8b..e9fed4dae1 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -91,6 +91,9 @@ static void brw_new_batch( struct intel_context *intel )
 {
    struct brw_context *brw = brw_context(&intel->ctx);
 
+   /* Check that we didn't just wrap our batchbuffer at a bad time. */
+   assert(!brw->no_batch_wrap);
+
    /* Mark all context state as needing to be re-emitted.
     * This is probably not as severe as on 915, since almost all of our state
     * is just in referenced buffers.
-- 
cgit v1.2.3