From 59b2c2adbbece27ccf54e58b598ea29cb3a5aa85 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 24 Oct 2008 13:02:21 -0700 Subject: i965: Fix check_aperture calls to cover everything needed for the prim at once. Previously, since my check_aperture API change, we would check each piece of state against the batchbuffer individually, but not all the state against the batchbuffer at once. In addition to not being terribly useful in assuring success, it probably also increased CPU load by calling check_aperture many times per primitive. --- src/mesa/drivers/dri/i965/brw_context.c | 1 + src/mesa/drivers/dri/i965/brw_context.h | 21 ++++++---- src/mesa/drivers/dri/i965/brw_curbe.c | 10 +---- src/mesa/drivers/dri/i965/brw_draw.c | 32 ++++++++++++++- src/mesa/drivers/dri/i965/brw_draw_upload.c | 20 +++++++++- src/mesa/drivers/dri/i965/brw_misc_state.c | 59 ++++++++++++---------------- src/mesa/drivers/dri/i965/brw_queryobj.c | 8 +--- src/mesa/drivers/dri/i965/brw_state.h | 18 +++++++++ src/mesa/drivers/dri/i965/brw_state_upload.c | 45 +++++++++++---------- 9 files changed, 133 insertions(+), 81 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 474158b484..e2bc08a6cb 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -39,6 +39,7 @@ #include "brw_context.h" #include "brw_defines.h" #include "brw_draw.h" +#include "brw_state.h" #include "brw_vs.h" #include "intel_tex.h" #include "intel_blit.h" diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 1b823ab8ca..e3904be977 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -444,6 +444,19 @@ struct brw_context GLuint nr_draw_regions; struct intel_region *draw_regions[MAX_DRAW_BUFFERS]; struct intel_region *depth_region; + + /** + * List of buffers accumulated in brw_validate_state to receive + * dri_bo_check_aperture treatment before exec, so we can know if we + * should flush the batch and try again before emitting primitives. + * + * This can be a fixed number as we only have a limited number of + * objects referenced from the batchbuffer in a primitive emit, + * consisting of the vertex buffers, pipelined state pointers, + * the CURBE, the depth buffer, and a query BO. + */ + dri_bo *validated_bos[VERT_ATTRIB_MAX + 16]; + int validated_bo_count; } state; struct brw_state_pointers attribs; @@ -678,14 +691,6 @@ void brw_prepare_query_begin(struct brw_context *brw); void brw_emit_query_begin(struct brw_context *brw); void brw_emit_query_end(struct brw_context *brw); -/*====================================================================== - * brw_state.c - */ -void brw_validate_state( struct brw_context *brw ); -void brw_init_state( struct brw_context *brw ); -void brw_destroy_state( struct brw_context *brw ); - - /*====================================================================== * brw_state_dump.c */ diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 6ffa221d66..c7bac7b0c5 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -307,6 +307,7 @@ static void prepare_constant_buffer(struct brw_context *brw) dri_bo_subdata(brw->curbe.curbe_bo, brw->curbe.curbe_offset, bufsz, buf); } + brw_add_validated_bo(brw, brw->curbe.curbe_bo); /* Because this provokes an action (ie copy the constants into the * URB), it shouldn't be shortcircuited if identical to the @@ -328,15 +329,6 @@ static void emit_constant_buffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; GLuint sz = brw->curbe.total_size; - dri_bo *aper_array[] = { - brw->intel.batch->buf, - brw->curbe.curbe_bo, - }; - - if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) { - intel_batchbuffer_flush(intel->batch); - return; - } BEGIN_BATCH(2, IGNORE_CLIPRECTS); if (sz == 0) { diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 05d6f42ab4..d87b8f8a84 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -256,6 +256,7 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, struct intel_context *intel = intel_context(ctx); struct brw_context *brw = brw_context(ctx); GLboolean retval = GL_FALSE; + GLboolean warn = GL_FALSE; GLuint i; if (ctx->NewState) @@ -301,8 +302,6 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, */ brw_set_prim(brw, prim[0].mode); - /* XXX: Need to separate validate and upload of state. - */ brw_validate_state( brw ); /* Various fallback checks: @@ -313,6 +312,31 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, if (check_fallbacks( brw, prim, nr_prims )) goto out; + /* Check that we can fit our state in with our existing batchbuffer, or + * flush otherwise. + */ + if (dri_bufmgr_check_aperture_space(brw->state.validated_bos, + brw->state.validated_bo_count)) { + static GLboolean warned; + intel_batchbuffer_flush(intel->batch); + + /* Validate the state after we flushed the batch (which would have + * changed the set of dirty state). If we still fail to + * check_aperture, warn of what's happening, but attempt to continue + * on since it may succeed anyway, and the user would probably rather + * see a failure and a warning than a fallback. + */ + brw_validate_state(brw); + if (!warned && + dri_bufmgr_check_aperture_space(brw->state.validated_bos, + brw->state.validated_bo_count)) { + warn = GL_TRUE; + warned = GL_TRUE; + } + } + + brw_upload_state(brw); + for (i = 0; i < nr_prims; i++) { brw_emit_prim(brw, &prim[i]); } @@ -323,6 +347,10 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, out: UNLOCK_HARDWARE(intel); + if (warn) + fprintf(stderr, "i965: Single primitive emit potentially exceeded " + "available aperture space\n"); + if (!retval) DBG("%s failed\n", __FUNCTION__); diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 7b88b5eaa1..4080c5e322 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -250,10 +250,10 @@ static void get_space( struct brw_context *brw, wrap_buffers(brw, size); } + assert(*bo_return == NULL); dri_bo_reference(brw->vb.upload.bo); *bo_return = brw->vb.upload.bo; *offset_return = brw->vb.upload.offset; - brw->vb.upload.offset += size; } @@ -359,6 +359,14 @@ static void brw_prepare_vertices(struct brw_context *brw) input->offset = (unsigned long)input->glarray->Ptr; input->stride = input->glarray->StrideB; } else { + if (input->bo != NULL) { + /* Already-uploaded vertex data is present from a previous + * prepare_vertices, but we had to re-validate state due to + * check_aperture failing and a new batch being produced. + */ + continue; + } + /* Queue the buffer object up to be uploaded in the next pass, * when we've decided if we're doing interleaved or not. */ @@ -417,6 +425,12 @@ static void brw_prepare_vertices(struct brw_context *brw) } brw_prepare_query_begin(brw); + + for (i = 0; i < nr_enabled; i++) { + struct brw_vertex_element *input = enabled[i]; + + brw_add_validated_bo(brw, input->bo); + } } static void brw_emit_vertices(struct brw_context *brw) @@ -512,7 +526,7 @@ static void brw_prepare_indices(struct brw_context *brw) struct intel_context *intel = &brw->intel; const struct _mesa_index_buffer *index_buffer = brw->ib.ib; GLuint ib_size; - dri_bo *bo; + dri_bo *bo = NULL; struct gl_buffer_object *bufferobj; GLuint offset; @@ -561,6 +575,8 @@ static void brw_prepare_indices(struct brw_context *brw) dri_bo_unreference(brw->ib.bo); brw->ib.bo = bo; brw->ib.offset = offset; + + brw_add_validated_bo(brw, brw->ib.bo); } static void brw_emit_indices(struct brw_context *brw) diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index e347dd2244..5bba8c84ec 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -98,6 +98,11 @@ const struct brw_tracked_state brw_drawing_rect = { .emit = upload_drawing_rect }; +static void prepare_binding_table_pointers(struct brw_context *brw) +{ + brw_add_validated_bo(brw, brw->wm.bind_bo); +} + /** * Upload the binding table pointers, which point each stage's array of surface * state pointers. @@ -108,15 +113,6 @@ const struct brw_tracked_state brw_drawing_rect = { static void upload_binding_table_pointers(struct brw_context *brw) { struct intel_context *intel = &brw->intel; - dri_bo *aper_array[] = { - intel->batch->buf, - brw->wm.bind_bo, - }; - - if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) { - intel_batchbuffer_flush(intel->batch); - return; - } BEGIN_BATCH(6, IGNORE_CLIPRECTS); OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); @@ -136,6 +132,7 @@ const struct brw_tracked_state brw_binding_table_pointers = { .brw = BRW_NEW_BATCH, .cache = CACHE_NEW_SURF_BIND, }, + .prepare = prepare_binding_table_pointers, .emit = upload_binding_table_pointers, }; @@ -169,23 +166,18 @@ static void upload_pipelined_state_pointers(struct brw_context *brw ) brw->state.dirty.brw |= BRW_NEW_PSP; } -static void upload_psp_urb_cbs(struct brw_context *brw ) + +static void prepare_psp_urb_cbs(struct brw_context *brw) { - struct intel_context *intel = &brw->intel; - dri_bo *aper_array[] = { - intel->batch->buf, - brw->vs.state_bo, - brw->gs.state_bo, - brw->clip.state_bo, - brw->wm.state_bo, - brw->cc.state_bo, - }; - - if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) { - intel_batchbuffer_flush(intel->batch); - return; - } + brw_add_validated_bo(brw, brw->vs.state_bo); + brw_add_validated_bo(brw, brw->gs.state_bo); + brw_add_validated_bo(brw, brw->clip.state_bo); + brw_add_validated_bo(brw, brw->wm.state_bo); + brw_add_validated_bo(brw, brw->cc.state_bo); +} +static void upload_psp_urb_cbs(struct brw_context *brw ) +{ upload_pipelined_state_pointers(brw); brw_upload_urb_fence(brw); brw_upload_constant_buffer_state(brw); @@ -203,9 +195,18 @@ const struct brw_tracked_state brw_psp_urb_cbs = { CACHE_NEW_WM_UNIT | CACHE_NEW_CC_UNIT) }, + .prepare = prepare_psp_urb_cbs, .emit = upload_psp_urb_cbs, }; +static void prepare_depthbuffer(struct brw_context *brw) +{ + struct intel_region *region = brw->state.depth_region; + + if (region != NULL) + brw_add_validated_bo(brw, region->buffer); +} + static void emit_depthbuffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; @@ -227,10 +228,6 @@ static void emit_depthbuffer(struct brw_context *brw) ADVANCE_BATCH(); } else { unsigned int format; - dri_bo *aper_array[] = { - intel->batch->buf, - region->buffer - }; switch (region->cpp) { case 2: @@ -247,11 +244,6 @@ static void emit_depthbuffer(struct brw_context *brw) return; } - if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) { - intel_batchbuffer_flush(intel->batch); - return; - } - BEGIN_BATCH(len, IGNORE_CLIPRECTS); OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); OUT_BATCH(((region->pitch * region->cpp) - 1) | @@ -280,6 +272,7 @@ const struct brw_tracked_state brw_depthbuffer = { .brw = BRW_NEW_DEPTH_BUFFER | BRW_NEW_BATCH, .cache = 0, }, + .prepare = prepare_depthbuffer, .emit = emit_depthbuffer, }; diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c index a1a1353dee..cb9169e2ee 100644 --- a/src/mesa/drivers/dri/i965/brw_queryobj.c +++ b/src/mesa/drivers/dri/i965/brw_queryobj.c @@ -42,6 +42,7 @@ #include "main/imports.h" #include "brw_context.h" +#include "brw_state.h" #include "intel_batchbuffer.h" #include "intel_reg.h" @@ -163,10 +164,6 @@ void brw_prepare_query_begin(struct brw_context *brw) { struct intel_context *intel = &brw->intel; - dri_bo *aper_array[] = { - intel->batch->buf, - brw->query.bo, - }; /* Skip if we're not doing any queries. */ if (is_empty_list(&brw->query.active_head)) @@ -182,8 +179,7 @@ brw_prepare_query_begin(struct brw_context *brw) brw->query.index = 0; } - if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) - intel_batchbuffer_flush(intel->batch); + brw_add_validated_bo(brw, brw->query.bo); } /** Called just before primitive drawing to get a beginning PS_DEPTH_COUNT. */ diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index a27ef1f8bc..bb22c03eeb 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -35,6 +35,16 @@ #include "brw_context.h" +static inline void +brw_add_validated_bo(struct brw_context *brw, dri_bo *bo) +{ + assert(brw->state.validated_bo_count < ARRAY_SIZE(brw->state.validated_bos)); + + if (bo != NULL) { + dri_bo_reference(bo); + brw->state.validated_bos[brw->state.validated_bo_count++] = bo; + } +}; const struct brw_tracked_state brw_blend_constant_color; const struct brw_tracked_state brw_cc_unit; @@ -83,6 +93,14 @@ const struct brw_tracked_state brw_drawing_rect; const struct brw_tracked_state brw_indices; const struct brw_tracked_state brw_vertices; +/*********************************************************************** + * brw_state.c + */ +void brw_validate_state(struct brw_context *brw); +void brw_upload_state(struct brw_context *brw); +void brw_init_state(struct brw_context *brw); +void brw_destroy_state(struct brw_context *brw); + /*********************************************************************** * brw_state_cache.c */ diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 8d57daf3ab..16b0496f47 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -169,6 +169,18 @@ static void xor_states( struct brw_state_flags *result, result->cache = a->cache ^ b->cache; } +static void +brw_clear_validated_bos(struct brw_context *brw) +{ + int i; + + /* Clear the last round of validated bos */ + for (i = 0; i < brw->state.validated_bo_count; i++) { + dri_bo_unreference(brw->state.validated_bos[i]); + brw->state.validated_bos[i] = NULL; + } + brw->state.validated_bo_count = 0; +} /*********************************************************************** * Emit all state: @@ -177,12 +189,15 @@ void brw_validate_state( struct brw_context *brw ) { struct intel_context *intel = &brw->intel; struct brw_state_flags *state = &brw->state.dirty; - GLuint i, count, pass = 0; - dri_bo *last_batch_bo = NULL; + GLuint i; + + brw_clear_validated_bos(brw); state->mesa |= brw->intel.NewGLState; brw->intel.NewGLState = 0; + brw_add_validated_bo(brw, intel->batch->buf); + if (brw->emit_state_always) { state->mesa |= ~0; state->brw |= ~0; @@ -208,8 +223,6 @@ void brw_validate_state( struct brw_context *brw ) brw->intel.Fallback = 0; - count = 0; - /* do prepare stage for all atoms */ for (i = 0; i < Elements(atoms); i++) { const struct brw_tracked_state *atom = brw->state.atoms[i]; @@ -223,19 +236,15 @@ void brw_validate_state( struct brw_context *brw ) } } } +} - if (brw->intel.Fallback) - return; - /* We're about to try to set up a coherent state in the batchbuffer for - * the emission of primitives. If we exceed the aperture size in any of the - * emit() calls, we need to go back to square 1 and try setting up again. - */ -got_flushed: - dri_bo_unreference(last_batch_bo); - last_batch_bo = intel->batch->buf; - dri_bo_reference(last_batch_bo); - assert(pass++ <= 2); +void brw_upload_state(struct brw_context *brw) +{ + struct brw_state_flags *state = &brw->state.dirty; + int i; + + brw_clear_validated_bos(brw); if (INTEL_DEBUG) { /* Debug version which enforces various sanity checks on the @@ -260,8 +269,6 @@ got_flushed: if (check_state(state, &atom->dirty)) { if (atom->emit) { atom->emit( brw ); - if (intel->batch->buf != last_batch_bo) - goto got_flushed; } } @@ -286,15 +293,11 @@ got_flushed: if (check_state(state, &atom->dirty)) { if (atom->emit) { atom->emit( brw ); - if (intel->batch->buf != last_batch_bo) - goto got_flushed; } } } } - dri_bo_unreference(last_batch_bo); - if (!brw->intel.Fallback) memset(state, 0, sizeof(*state)); } -- cgit v1.2.3