From 503eb57a003f51c25687e9cf0ad6f7939a757f1b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 8 Jun 2010 13:55:53 -0700 Subject: i965: Avoid calloc/free in the CURBE upload process. In exchange we end up with an extra memcpy, but that seems better than calloc/free. Each buffer is 4k maximum, and on the i965-streaming branch this allocation was showing up as the top entry in brw_validate_state profiling for cairo-gl. --- src/mesa/drivers/dri/i965/brw_context.c | 3 +++ src/mesa/drivers/dri/i965/brw_context.h | 11 +++++++++++ src/mesa/drivers/dri/i965/brw_curbe.c | 24 +++++++++--------------- src/mesa/drivers/dri/i965/brw_state_cache.c | 5 ----- src/mesa/drivers/dri/i965/brw_vtbl.c | 3 +++ 5 files changed, 26 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index dc4bd5802d..0be60ad83c 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -184,6 +184,9 @@ GLboolean brwCreateContext( int api, brw_init_state( brw ); + brw->curbe.last_buf = calloc(1, 4096); + brw->curbe.next_buf = calloc(1, 4096); + brw->state.dirty.mesa = ~0; brw->state.dirty.brw = ~0; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 14552fa272..d97634c1c6 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -573,7 +573,18 @@ struct brw_context /** Offset within curbe_bo of space for next curbe entry */ GLuint curbe_next_offset; + /** + * Copy of the last set of CURBEs uploaded. Frequently we'll end up + * in brw_curbe.c with the same set of constant data to be uploaded, + * so we'd rather not upload new constants in that case (it can cause + * a pipeline bubble since only up to 4 can be pipelined at a time). + */ GLfloat *last_buf; + /** + * Allocation for where to calculate the next set of CURBEs. + * It's a hot enough path that malloc/free of that data matters. + */ + GLfloat *next_buf; GLuint last_bufsz; } curbe; diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 06053d5bcb..c79b0a79e5 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -190,15 +190,11 @@ static void prepare_constant_buffer(struct brw_context *brw) GLuint i; if (sz == 0) { - if (brw->curbe.last_buf) { - free(brw->curbe.last_buf); - brw->curbe.last_buf = NULL; - brw->curbe.last_bufsz = 0; - } + brw->curbe.last_bufsz = 0; return; } - buf = (GLfloat *) calloc(1, bufsz); + buf = brw->curbe.next_buf; /* fragment shader constants */ if (brw->curbe.wm_size) { @@ -289,18 +285,15 @@ static void prepare_constant_buffer(struct brw_context *brw) } if (brw->curbe.curbe_bo != NULL && - brw->curbe.last_buf && bufsz == brw->curbe.last_bufsz && memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { /* constants have not changed */ - free(buf); - } - else { - /* constants have changed */ - if (brw->curbe.last_buf) - free(brw->curbe.last_buf); - - brw->curbe.last_buf = buf; + } else { + /* Update the record of what our last set of constants was. We + * don't just flip the pointers because we don't fill in the + * data in the padding between the entries. + */ + memcpy(brw->curbe.last_buf, buf, bufsz); brw->curbe.last_bufsz = bufsz; if (brw->curbe.curbe_bo != NULL && @@ -319,6 +312,7 @@ static void prepare_constant_buffer(struct brw_context *brw) 4096, 1 << 6); brw->curbe.curbe_next_offset = 0; drm_intel_gem_bo_map_gtt(brw->curbe.curbe_bo); + assert(bufsz < 4096); } brw->curbe.curbe_offset = brw->curbe.curbe_next_offset; diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index 415b645665..ea81ad1341 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -447,11 +447,6 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) cache->n_items = 0; - if (brw->curbe.last_buf) { - free(brw->curbe.last_buf); - brw->curbe.last_buf = NULL; - } - brw->state.dirty.mesa |= ~0; brw->state.dirty.brw |= ~0; brw->state.dirty.cache |= ~0; diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index c8ebb4a4f1..a02e958c5e 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -105,6 +105,9 @@ static void brw_destroy_context( struct intel_context *intel ) dri_bo_release(&brw->cc.blend_state_bo); dri_bo_release(&brw->cc.depth_stencil_state_bo); dri_bo_release(&brw->cc.color_calc_state_bo); + + free(brw->curbe.last_buf); + free(brw->curbe.next_buf); } -- cgit v1.2.3