From 0c8f8d3dc9d60ed34eeca7f3606651420a81753c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 22 Sep 2004 06:27:02 +0000 Subject: The previous code would emit a full set of state during the first EmitState on a new cmdbuf, to ensure that state wasn't lost across UNLOCK/LOCK pairs (in the case of context switching). This was rather inefficient. Instead, after flushing a cmdbuf, mark the state as needing to be saved on UNLOCK. Then, at the beginning of flushing a cmdbuf, if we actually have lost the context, go back and emit a new cmdbuf with the full set of state, before continuing with the cmdbuf flush. Also, remove the dirty/clean atom lists, since atoms are emitted in a fixed order these days, and go with a simpler single list. Provides a 14% improvement in ipers performance in my tests, along with other apps. --- src/mesa/drivers/dri/r200/r200_cmdbuf.c | 162 +++++++++++++--------------- src/mesa/drivers/dri/r200/r200_context.c | 2 +- src/mesa/drivers/dri/r200/r200_context.h | 9 +- src/mesa/drivers/dri/r200/r200_ioctl.c | 86 +++++++++++---- src/mesa/drivers/dri/r200/r200_ioctl.h | 9 +- src/mesa/drivers/dri/r200/r200_lock.c | 2 + src/mesa/drivers/dri/r200/r200_lock.h | 4 + src/mesa/drivers/dri/r200/r200_state_init.c | 19 ++-- 8 files changed, 168 insertions(+), 125 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r200/r200_cmdbuf.c b/src/mesa/drivers/dri/r200/r200_cmdbuf.c index fa0c62385b..e3829f9a95 100644 --- a/src/mesa/drivers/dri/r200/r200_cmdbuf.c +++ b/src/mesa/drivers/dri/r200/r200_cmdbuf.c @@ -58,113 +58,105 @@ static void print_state_atom( struct r200_state_atom *state ) } -static void r200_emit_state_list( r200ContextPtr rmesa, - struct r200_state_atom *list ) +/* The state atoms will be emitted in the order they appear in the atom list, + * so this step is important. + */ +void r200SetUpAtomList( r200ContextPtr rmesa ) { - struct r200_state_atom *state, *tmp; - char *dest; - int i, size, mtu; - - size = 0; - foreach_s( state, tmp, list ) { - if (state->check( rmesa->glCtx, state->idx )) { -/* dest = r200AllocCmdBuf( rmesa, state->cmd_size * 4, __FUNCTION__); - memcpy( dest, state->cmd, state->cmd_size * 4);*/ - size += state->cmd_size; - state->dirty = GL_TRUE; - move_to_head( &(rmesa->hw.clean), state ); - if (R200_DEBUG & DEBUG_STATE) - print_state_atom( state ); - } - else if (R200_DEBUG & DEBUG_STATE) - fprintf(stderr, "skip state %s\n", state->name); - } - - if (!size) - return; + int i, mtu; - dest = r200AllocCmdBuf( rmesa, size * 4, __FUNCTION__); mtu = rmesa->glCtx->Const.MaxTextureUnits; -#define EMIT_ATOM(ATOM) \ -do { \ - if (rmesa->hw.ATOM.dirty) { \ - rmesa->hw.ATOM.dirty = GL_FALSE; \ - memcpy( dest, rmesa->hw.ATOM.cmd, rmesa->hw.ATOM.cmd_size * 4); \ - dest += rmesa->hw.ATOM.cmd_size * 4; \ - } \ -} while (0) - - EMIT_ATOM (ctx); - EMIT_ATOM (set); - EMIT_ATOM (lin); - EMIT_ATOM (msk); - EMIT_ATOM (vpt); - EMIT_ATOM (vtx); - EMIT_ATOM (vap); - EMIT_ATOM (vte); - EMIT_ATOM (msc); - EMIT_ATOM (cst); - EMIT_ATOM (zbs); - EMIT_ATOM (tcl); - EMIT_ATOM (msl); - EMIT_ATOM (tcg); - EMIT_ATOM (grd); - EMIT_ATOM (fog); - EMIT_ATOM (tam); - EMIT_ATOM (tf); - for (i = 0; i < mtu; ++i) { - EMIT_ATOM (tex[i]); - } - for (i = 0; i < mtu; ++i) { - EMIT_ATOM (cube[i]); - } + make_empty_list(&rmesa->hw.atomlist); + rmesa->hw.atomlist.name = "atom-list"; + + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ctx ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.set ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lin ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msk ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpt ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vtx ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vap ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vte ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msc ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.cst ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.zbs ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tcl ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msl ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tcg ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.grd ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.fog ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tam ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tf ); + for (i = 0; i < mtu; ++i) + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tex[i] ); + for (i = 0; i < mtu; ++i) + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.cube[i] ); for (i = 0; i < 3 + mtu; ++i) - EMIT_ATOM (mat[i]); - EMIT_ATOM (eye); - EMIT_ATOM (glt); - for (i = 0; i < 2; ++i) { - EMIT_ATOM (mtl[i]); - } + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.mat[i] ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.eye ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.glt ); + for (i = 0; i < 2; ++i) + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.mtl[i] ); for (i = 0; i < 8; ++i) - EMIT_ATOM (lit[i]); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lit[i] ); for (i = 0; i < 6; ++i) - EMIT_ATOM (ucp[i]); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ucp[i] ); for (i = 0; i < 6; ++i) - EMIT_ATOM (pix[i]); - -#undef EMIT_ATOM - + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.pix[i] ); } - void r200EmitState( r200ContextPtr rmesa ) { - struct r200_state_atom *state, *tmp; + char *dest; + int i, mtu; + struct r200_state_atom *atom; if (R200_DEBUG & (DEBUG_STATE|DEBUG_PRIMS)) fprintf(stderr, "%s\n", __FUNCTION__); - /* Somewhat overkill: - */ - if ( rmesa->lost_context) { - if (R200_DEBUG & (DEBUG_STATE|DEBUG_PRIMS|DEBUG_IOCTL)) - fprintf(stderr, "%s - lost context\n", __FUNCTION__); + if (!rmesa->hw.is_dirty && !rmesa->hw.all_dirty) + return; - foreach_s( state, tmp, &(rmesa->hw.clean) ) - move_to_tail(&(rmesa->hw.dirty), state ); + mtu = rmesa->glCtx->Const.MaxTextureUnits; - rmesa->lost_context = 0; + /* To avoid going across the entire set of states multiple times, just check + * for enough space for the case of emitting all state, and inline the + * r200AllocCmdBuf code here without all the checks. + */ + dest = rmesa->store.cmd_buf + rmesa->store.cmd_used; + r200EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size ); + + if (R200_DEBUG & DEBUG_STATE) { + foreach( atom, &rmesa->hw.atomlist ) { + if ( atom->dirty || rmesa->hw.all_dirty ) { + if ( atom->check( rmesa->glCtx, atom->idx ) ) + print_state_atom( atom ); + else + fprintf(stderr, "skip state %s\n", atom->name); + } + } } -/* else { - move_to_tail( &rmesa->hw.dirty, &rmesa->hw.mtl[0] );*/ - /* odd bug? -- isosurf, cycle between reflect & lit */ -/* }*/ - r200_emit_state_list( rmesa, &rmesa->hw.dirty ); -} + foreach( atom, &rmesa->hw.atomlist ) { + if ( rmesa->hw.all_dirty ) + atom->dirty = GL_TRUE; + if ( atom->dirty ) { + if ( atom->check( rmesa->glCtx, atom->idx ) ) { + int size = atom->cmd_size * 4; + memcpy( dest, atom->cmd, size); + dest += size; + rmesa->store.cmd_used += size; + atom->dirty = GL_FALSE; + } + } + } + assert( rmesa->store.cmd_used <= R200_CMD_BUF_SZ ); + rmesa->hw.is_dirty = GL_FALSE; + rmesa->hw.all_dirty = GL_FALSE; +} /* Fire a section of the retained (indexed_verts) buffer as a regular * primtive. diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c index 4151b1598e..c5fdc86a29 100644 --- a/src/mesa/drivers/dri/r200/r200_context.c +++ b/src/mesa/drivers/dri/r200/r200_context.c @@ -323,7 +323,7 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16; rmesa->swtcl.RenderIndex = ~0; - rmesa->lost_context = 1; + rmesa->hw.all_dirty = 1; /* Set the maximum texture size small enough that we can guarentee that * all texture units can bind a maximal texture and have them both in diff --git a/src/mesa/drivers/dri/r200/r200_context.h b/src/mesa/drivers/dri/r200/r200_context.h index f000e14330..25faec955c 100644 --- a/src/mesa/drivers/dri/r200/r200_context.h +++ b/src/mesa/drivers/dri/r200/r200_context.h @@ -191,6 +191,7 @@ struct r200_state_atom { GLuint idx; int *cmd; /* one or more cmd's */ int *lastcmd; /* one or more cmd's */ + int *savedcmd; /* one or more cmd's */ GLboolean dirty; GLboolean (*check)( GLcontext *, int ); /* is this state active? */ }; @@ -491,10 +492,8 @@ struct r200_state_atom { struct r200_hw_state { - /* All state should be on one of these lists: - */ - struct r200_state_atom dirty; /* dirty list head placeholder */ - struct r200_state_atom clean; /* clean list head placeholder */ + /* Head of the linked list of state atoms. */ + struct r200_state_atom atomlist; /* Hardware state, stored as cmdbuf commands: * -- Need to doublebuffer for @@ -530,6 +529,7 @@ struct r200_hw_state { struct r200_state_atom glt; int max_state_size; /* Number of bytes necessary for a full state emit. */ + GLboolean is_dirty, all_dirty; }; struct r200_state { @@ -876,6 +876,7 @@ struct r200_context { drm_clip_rect_t *pClipRects; unsigned int lastStamp; GLboolean lost_context; + GLboolean save_on_next_unlock; r200ScreenPtr r200Screen; /* Screen private DRI data */ drm_radeon_sarea_t *sarea; /* Private SAREA data */ diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.c b/src/mesa/drivers/dri/r200/r200_ioctl.c index fb462e0850..955afb0d5f 100644 --- a/src/mesa/drivers/dri/r200/r200_ioctl.c +++ b/src/mesa/drivers/dri/r200/r200_ioctl.c @@ -58,12 +58,71 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. static void r200WaitForIdle( r200ContextPtr rmesa ); +void r200SaveHwState( r200ContextPtr rmesa ) +{ + struct r200_state_atom *atom; + + foreach( atom, &rmesa->hw.atomlist ) + memcpy(atom->savedcmd, atom->cmd, atom->cmd_size * 4); +} + +static void r200SwapHwState( r200ContextPtr rmesa ) +{ + int *temp; + struct r200_state_atom *atom; + + foreach( atom, &rmesa->hw.atomlist ) { + temp = atom->cmd; + atom->cmd = atom->savedcmd; + atom->savedcmd = temp; + } +} + +/* At this point we were in FlushCmdBufLocked but we had lost our context, so + * we need to unwire our current cmdbuf and hook a new one in, emit that, then + * wire the old cmdbuf back in so that FlushCmdBufLocked can continue and the + * buffer can depend on the state not being lost across lock/unlock. + */ +static void r200BackUpAndEmitLostStateLocked( r200ContextPtr rmesa ) +{ + GLuint nr_released_bufs; + struct r200_store store; + struct r200_hw_state temp_state; + static int count = 0; + + rmesa->lost_context = GL_FALSE; + + nr_released_bufs = rmesa->dma.nr_released_bufs; + store = rmesa->store; + rmesa->store.statenr = 0; + rmesa->store.primnr = 0; + rmesa->store.cmd_used = 0; + rmesa->store.elts_start = 0; + rmesa->hw.all_dirty = GL_TRUE; + r200SwapHwState( rmesa ); + /* In this case it's okay to EmitState while locked because we won't exhaust + * our (empty) cmdbuf. + */ + r200EmitState( rmesa ); + r200FlushCmdBufLocked( rmesa, __FUNCTION__ ); + + r200SwapHwState( rmesa ); + /* We've just cleared out the dirty flags, so we don't remember what + * actually needed to be emitted for the next state emit. + */ + rmesa->hw.all_dirty = GL_TRUE; + rmesa->dma.nr_released_bufs = nr_released_bufs; + rmesa->store = store; +} int r200FlushCmdBufLocked( r200ContextPtr rmesa, const char * caller ) { int ret, i; drm_radeon_cmd_buffer_t cmd; + if (rmesa->lost_context) + r200BackUpAndEmitLostStateLocked( rmesa ); + if (R200_DEBUG & DEBUG_IOCTL) { fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); @@ -132,18 +191,7 @@ int r200FlushCmdBufLocked( r200ContextPtr rmesa, const char * caller ) rmesa->store.statenr = 0; rmesa->store.cmd_used = 0; rmesa->dma.nr_released_bufs = 0; - /* Set lost_context so that the first state emit on the new buffer is a full - * one. This is because the context might get lost while preparing the next - * buffer, and when we lock and find out, we don't have the information to - * recreate the state. This function should always be called before the new - * buffer is begun, so it's sufficient to just set lost_context here. - * - * The alternative to this would be to copy out the state on unlock - * (approximately) and if we did lose the context, dispatch a cmdbuf to reset - * the state to that old copy before continuing with the accumulated command - * buffer. - */ - rmesa->lost_context = 1; + rmesa->save_on_next_unlock = 1; return ret; } @@ -464,7 +512,7 @@ void r200CopyBuffer( const __DRIdrawablePrivate *dPriv ) } UNLOCK_HARDWARE( rmesa ); - rmesa->lost_context = 1; + rmesa->hw.all_dirty = GL_TRUE; rmesa->swap_count++; (*rmesa->get_ust)( & ust ); @@ -613,13 +661,6 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask, GLboolean all, cx += dPriv->x; cy = dPriv->y + dPriv->h - cy - ch; - /* We have to emit state along with the clear, since the kernel relies on - * some of it. The EmitState that was above R200_FIREVERTICES was an - * attempt to do that, except that another context may come in and cause us - * to lose our context while we're unlocked. - */ - r200EmitState( rmesa ); - LOCK_HARDWARE( rmesa ); /* Throttle the number of clear ioctls we do. @@ -722,7 +763,7 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask, GLboolean all, } UNLOCK_HARDWARE( rmesa ); - rmesa->lost_context = 1; + rmesa->hw.all_dirty = GL_TRUE; } @@ -763,8 +804,7 @@ void r200Flush( GLcontext *ctx ) if (rmesa->dma.flush) rmesa->dma.flush( rmesa ); - if (!is_empty_list(&rmesa->hw.dirty)) - r200EmitState( rmesa ); + r200EmitState( rmesa ); if (rmesa->store.cmd_used) r200FlushCmdBuf( rmesa, __FUNCTION__ ); diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.h b/src/mesa/drivers/dri/r200/r200_ioctl.h index 1503df7075..57474d7a05 100644 --- a/src/mesa/drivers/dri/r200/r200_ioctl.h +++ b/src/mesa/drivers/dri/r200/r200_ioctl.h @@ -117,6 +117,9 @@ extern GLboolean r200IsGartMemory( r200ContextPtr rmesa, const GLvoid *pointer, extern GLuint r200GartOffsetFromVirtual( r200ContextPtr rmesa, const GLvoid *pointer ); +void r200SaveHwState( r200ContextPtr rmesa ); +void r200SetUpAtomList( r200ContextPtr rmesa ); + /* ================================================================ * Helper macros: */ @@ -135,7 +138,8 @@ do { \ #define R200_STATECHANGE( rmesa, ATOM ) \ do { \ R200_NEWPRIM( rmesa ); \ - move_to_head( &(rmesa->hw.dirty), &(rmesa->hw.ATOM)); \ + rmesa->hw.ATOM.dirty = GL_TRUE; \ + rmesa->hw.is_dirty = GL_TRUE; \ } while (0) #define R200_DB_STATE( ATOM ) \ @@ -149,7 +153,8 @@ static __inline int R200_DB_STATECHANGE( if (memcmp(atom->cmd, atom->lastcmd, atom->cmd_size*4)) { int *tmp; R200_NEWPRIM( rmesa ); - move_to_head( &(rmesa->hw.dirty), atom ); + atom->dirty = GL_TRUE; + rmesa->hw.is_dirty = GL_TRUE; tmp = atom->cmd; atom->cmd = atom->lastcmd; atom->lastcmd = tmp; diff --git a/src/mesa/drivers/dri/r200/r200_lock.c b/src/mesa/drivers/dri/r200/r200_lock.c index 1700d31e16..539a4be619 100644 --- a/src/mesa/drivers/dri/r200/r200_lock.c +++ b/src/mesa/drivers/dri/r200/r200_lock.c @@ -116,4 +116,6 @@ void r200GetLock( r200ContextPtr rmesa, GLuint flags ) for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) { DRI_AGE_TEXTURES( rmesa->texture_heaps[ i ] ); } + + rmesa->lost_context = GL_TRUE; } diff --git a/src/mesa/drivers/dri/r200/r200_lock.h b/src/mesa/drivers/dri/r200/r200_lock.h index 587e4fe5cc..908052659c 100644 --- a/src/mesa/drivers/dri/r200/r200_lock.h +++ b/src/mesa/drivers/dri/r200/r200_lock.h @@ -104,6 +104,10 @@ extern int prevLockLine; rmesa->dri.hwLock, \ rmesa->dri.hwContext ); \ DEBUG_RESET(); \ + if (rmesa->save_on_next_unlock) { \ + r200SaveHwState( rmesa ); \ + rmesa->save_on_next_unlock = GL_FALSE; \ + } \ } while (0) #endif diff --git a/src/mesa/drivers/dri/r200/r200_state_init.c b/src/mesa/drivers/dri/r200/r200_state_init.c index 3b6893aeee..e97a2f4d99 100644 --- a/src/mesa/drivers/dri/r200/r200_state_init.c +++ b/src/mesa/drivers/dri/r200/r200_state_init.c @@ -65,8 +65,9 @@ void r200PrintDirty( r200ContextPtr rmesa, const char *msg ) fprintf(stderr, msg); fprintf(stderr, ": "); - foreach(l, &(rmesa->hw.dirty)) { - fprintf(stderr, "%s, ", l->name); + foreach(l, &rmesa->hw.atomlist) { + if (l->dirty || rmesa->hw.all_dirty) + fprintf(stderr, "%s, ", l->name); } fprintf(stderr, "\n"); @@ -200,11 +201,6 @@ void r200InitState( r200ContextPtr rmesa ) rmesa->state.pixel.readOffset = rmesa->state.color.drawOffset; rmesa->state.pixel.readPitch = rmesa->state.color.drawPitch; - /* Initialize lists: - */ - make_empty_list(&(rmesa->hw.dirty)); rmesa->hw.dirty.name = "DIRTY"; - make_empty_list(&(rmesa->hw.clean)); rmesa->hw.clean.name = "CLEAN"; - rmesa->hw.max_state_size = 0; #define ALLOC_STATE( ATOM, CHK, SZ, NM, IDX ) \ @@ -212,10 +208,11 @@ void r200InitState( r200ContextPtr rmesa ) rmesa->hw.ATOM.cmd_size = SZ; \ rmesa->hw.ATOM.cmd = (int *)CALLOC(SZ * sizeof(int)); \ rmesa->hw.ATOM.lastcmd = (int *)CALLOC(SZ * sizeof(int)); \ + rmesa->hw.ATOM.savedcmd = (int *)CALLOC(SZ * sizeof(int)); \ rmesa->hw.ATOM.name = NM; \ rmesa->hw.ATOM.idx = IDX; \ - rmesa->hw.ATOM.check = check_##CHK; \ - insert_at_head(&(rmesa->hw.dirty), &(rmesa->hw.ATOM)); \ + rmesa->hw.ATOM.check = check_##CHK; \ + rmesa->hw.ATOM.dirty = GL_FALSE; \ rmesa->hw.max_state_size += SZ * sizeof(int); \ } while (0) @@ -308,6 +305,7 @@ void r200InitState( r200ContextPtr rmesa ) ALLOC_STATE( pix[4], tex, PIX_STATE_SIZE, "PIX/pixstage-4", 4 ); ALLOC_STATE( pix[5], tex, PIX_STATE_SIZE, "PIX/pixstage-5", 5 ); + r200SetUpAtomList( rmesa ); /* Fill in the packet headers: */ @@ -772,5 +770,6 @@ void r200InitState( r200ContextPtr rmesa ) r200LightingSpaceChange( ctx ); - rmesa->lost_context = 1; + r200SaveHwState( rmesa ); + rmesa->hw.all_dirty = GL_TRUE; } -- cgit v1.2.3