summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i965
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2008-01-10 14:43:16 -0800
committerEric Anholt <eric@anholt.net>2008-01-10 14:43:16 -0800
commit609ad99a1a4b3a59436c520b355f482dff64b34a (patch)
tree487588f4396a3d54acdf745e98904a052fb166d9 /src/mesa/drivers/dri/i965
parent4e5b3626a1fc42ff7a88264ded8f0997b0fcd22e (diff)
[965] Improve performance by allocating CURBE buffers a page at a time.
Since each one is only 64b, and kernel allocations are a page anyway, this lets us reduce buffer allocation by packing many CURBEs into one buffer, for each batchbuffer submitted. Improves openarena performance by around 10%.
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h4
-rw-r--r--src/mesa/drivers/dri/i965/brw_curbe.c35
-rw-r--r--src/mesa/drivers/dri/i965/brw_vtbl.c3
3 files changed, 33 insertions, 9 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 9ddd41d3f3..7a2073d7c1 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -538,6 +538,10 @@ struct brw_context
struct brw_tracked_state tracked_state;
dri_bo *curbe_bo;
+ /** Offset within curbe_bo of space for current curbe entry */
+ GLuint curbe_offset;
+ /** Offset within curbe_bo of space for next curbe entry */
+ GLuint curbe_next_offset;
GLfloat *last_buf;
GLuint last_bufsz;
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index 2e39ec4805..f41f659b33 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -286,7 +286,8 @@ static void upload_constant_buffer(struct brw_context *brw)
brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
}
- if (brw->curbe.last_buf &&
+ if (brw->curbe.curbe_bo != NULL &&
+ brw->curbe.last_buf &&
bufsz == brw->curbe.last_bufsz &&
memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
free(buf);
@@ -297,16 +298,32 @@ static void upload_constant_buffer(struct brw_context *brw)
brw->curbe.last_buf = buf;
brw->curbe.last_bufsz = bufsz;
- dri_bo_unreference(brw->curbe.curbe_bo);
- brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE",
- bufsz, 1 << 6,
- DRM_BO_FLAG_MEM_LOCAL |
- DRM_BO_FLAG_CACHED |
- DRM_BO_FLAG_CACHED_MAPPED);
+ if (brw->curbe.curbe_bo != NULL &&
+ brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size)
+ {
+ dri_bo_unreference(brw->curbe.curbe_bo);
+ brw->curbe.curbe_bo = NULL;
+ }
+
+ if (brw->curbe.curbe_bo == NULL) {
+ /* Allocate a single page for CURBE entries for this batchbuffer.
+ * They're generally around 64b.
+ */
+ brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE",
+ 4096, 1 << 6,
+ DRM_BO_FLAG_MEM_LOCAL |
+ DRM_BO_FLAG_CACHED |
+ DRM_BO_FLAG_CACHED_MAPPED);
+ brw->curbe.curbe_next_offset = 0;
+ }
+
+ brw->curbe.curbe_offset = brw->curbe.curbe_next_offset;
+ brw->curbe.curbe_next_offset += bufsz;
+ brw->curbe.curbe_next_offset = ALIGN(brw->curbe.curbe_next_offset, 64);
/* Copy data to the buffer:
*/
- dri_bo_subdata(brw->curbe.curbe_bo, 0, bufsz, buf);
+ dri_bo_subdata(brw->curbe.curbe_bo, brw->curbe.curbe_offset, bufsz, buf);
}
/* Because this provokes an action (ie copy the constants into the
@@ -325,7 +342,7 @@ static void upload_constant_buffer(struct brw_context *brw)
BEGIN_BATCH(2, IGNORE_CLIPRECTS);
OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2));
OUT_RELOC(brw->curbe.curbe_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
- (sz - 1));
+ (sz - 1) + brw->curbe.curbe_offset);
ADVANCE_BATCH();
}
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
index e9fed4dae1..126e655839 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -94,6 +94,9 @@ static void brw_new_batch( struct intel_context *intel )
/* Check that we didn't just wrap our batchbuffer at a bad time. */
assert(!brw->no_batch_wrap);
+ dri_bo_unreference(brw->curbe.curbe_bo);
+ brw->curbe.curbe_bo = NULL;
+
/* Mark all context state as needing to be re-emitted.
* This is probably not as severe as on 915, since almost all of our state
* is just in referenced buffers.