From 817dcdd280cd749c3186bd3f00c06f41270aa884 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Apr 2009 09:07:02 -0600 Subject: i965: use new _NEW_PROGRAM_CONSTANTS flag instead of dynamic flags --- src/mesa/drivers/dri/i965/brw_curbe.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 18b187ed1d..3c81899672 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -188,13 +188,6 @@ static void prepare_constant_buffer(struct brw_context *brw) GLfloat *buf; GLuint i; - /* Update our own dependency flags. This works because this - * function will also be called whenever fp or vp changes. - */ - brw->curbe.tracked_state.dirty.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION); - brw->curbe.tracked_state.dirty.mesa |= vp->program.Base.Parameters->StateFlags; - brw->curbe.tracked_state.dirty.mesa |= fp->program.Base.Parameters->StateFlags; - if (sz == 0) { if (brw->curbe.last_buf) { free(brw->curbe.last_buf); @@ -422,7 +415,7 @@ static void emit_constant_buffer(struct brw_context *brw) */ const struct brw_tracked_state brw_constant_buffer = { .dirty = { - .mesa = (_NEW_TRANSFORM|_NEW_PROJECTION), /* plus fp and vp flags */ + .mesa = _NEW_PROGRAM_CONSTANTS, .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_VERTEX_PROGRAM | BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ -- cgit v1.2.3 From 6b6a23c0f7e042d71764a2028f3d33b59076ac7c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Apr 2009 09:07:46 -0600 Subject: i965: updates to some debug code --- src/mesa/drivers/dri/i965/brw_curbe.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 3c81899672..da746e4aa0 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -352,11 +352,7 @@ update_constant_buffer(struct brw_context *brw, dri_bo_unmap(const_buffer); if (0) { - int i; - for (i = 0; i < params->NumParameters; i++) { - float *p = params->ParameterValues[i]; - printf("%d: %f %f %f %f\n", i, p[0], p[1], p[2], p[3]); - } + _mesa_print_parameter_list(params); } } } @@ -369,7 +365,7 @@ update_vertex_constant_buffer(struct brw_context *brw) struct brw_vertex_program *vp = (struct brw_vertex_program *) brw->vertex_program; if (0) { - printf("update VS constants in buffer %p\n", vp->const_buffer); + printf("update VS constants in buffer %p vp = %p\n", vp->const_buffer, vp); printf("program %u\n", vp->program.Base.Id); } update_constant_buffer(brw, vp->program.Base.Parameters, vp->const_buffer); @@ -382,6 +378,10 @@ update_fragment_constant_buffer(struct brw_context *brw) { struct brw_fragment_program *fp = (struct brw_fragment_program *) brw->fragment_program; + if (0) { + printf("update WM constants in buffer %p\n", fp->const_buffer); + printf("program %u\n", fp->program.Base.Id); + } update_constant_buffer(brw, fp->program.Base.Parameters, fp->const_buffer); } -- cgit v1.2.3 From 1dbab84e21cad81e971265db3dbc8dc6c344b340 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Apr 2009 09:14:16 -0600 Subject: i965: use _NEW_PROGRAM_CONSTANTS and always create new const buffers When program constants change we create a new VS constant buffer instead of re-using the old one. This allows us to have several const buffers in flight with vertex rendering. --- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 28 ++++++++++++------------ 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 71840d1e4e..89c456e62c 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -456,17 +456,14 @@ brw_update_vs_constant_surface( GLcontext *ctx, assert(surf == 0); - /* free old const buffer if too small */ - if (const_buffer && const_buffer->size < size) { - dri_bo_unreference(const_buffer); - const_buffer = NULL; - } + /* We always create a new VS constant buffer so that several can be + * in flight at a time. Free the old one first... + */ + dri_bo_unreference(const_buffer); - /* alloc new buffer if needed */ - if (!const_buffer) { - const_buffer = - drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", size, 64); - } + /* alloc new buffer */ + const_buffer = + drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", size, 64); memset(&key, 0, sizeof(key)); @@ -783,8 +780,7 @@ brw_vs_get_binding_table(struct brw_context *brw) /** - * Vertex shader surfaces. Just constant buffer for now. Could add vertex - * shader textures in the future. + * Vertex shader surfaces (constant buffer). */ static void prepare_vs_surfaces(struct brw_context *brw ) { @@ -820,8 +816,12 @@ prepare_surfaces(struct brw_context *brw) const struct brw_tracked_state brw_wm_surfaces = { .dirty = { - .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS | _NEW_PROGRAM, - .brw = BRW_NEW_CONTEXT, + .mesa = (_NEW_COLOR | + _NEW_TEXTURE | + _NEW_BUFFERS | + _NEW_PROGRAM | + _NEW_PROGRAM_CONSTANTS), + .brw = (BRW_NEW_CONTEXT), .cache = 0 }, .prepare = prepare_surfaces, -- cgit v1.2.3 From e5681fc176bc43bc6c7804bd1e8d8557cdcab345 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Apr 2009 09:16:21 -0600 Subject: i965: add _NEW_PROGRAM_CONSTANTS to mesa_bits[] list --- src/mesa/drivers/dri/i965/brw_state_upload.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 5de1450e61..197efeb1b7 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -218,6 +218,7 @@ static struct dirty_bit_map mesa_bits[] = { DEFINE_BIT(_NEW_MULTISAMPLE), DEFINE_BIT(_NEW_TRACK_MATRIX), DEFINE_BIT(_NEW_PROGRAM), + DEFINE_BIT(_NEW_PROGRAM_CONSTANTS), {0, 0, 0} }; -- cgit v1.2.3 From f428255bde93a452a7cdd48fba21839c99beb6cb Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Apr 2009 09:23:15 -0600 Subject: i965: the brw_constant_buffer state atom is no longer dynamic No more dynamic atoms so we can simplify the state validation code a little. --- src/mesa/drivers/dri/i965/brw_context.h | 7 ------- src/mesa/drivers/dri/i965/brw_state_upload.c | 31 +++++----------------------- 2 files changed, 5 insertions(+), 33 deletions(-) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index a0b3b06309..af9fef5e22 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -448,8 +448,6 @@ struct brw_context struct { struct brw_state_flags dirty; - struct brw_tracked_state **atoms; - GLuint nr_atoms; GLuint nr_color_regions; struct intel_region *color_regions[MAX_DRAW_BUFFERS]; @@ -553,11 +551,6 @@ struct brw_context GLuint vs_size; GLuint total_size; - /* Dynamic tracker which changes to reflect the state referenced - * by active fp and vp program parameters: - */ - struct brw_tracked_state tracked_state; - dri_bo *curbe_bo; /** Offset within curbe_bo of space for current curbe entry */ GLuint curbe_offset; diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 197efeb1b7..491e2e2452 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -102,39 +102,18 @@ const struct brw_tracked_state *atoms[] = &brw_indices, &brw_vertices, - NULL, /* brw_constant_buffer */ + &brw_constant_buffer }; void brw_init_state( struct brw_context *brw ) { - GLuint i; - brw_init_cache(brw); - - brw->state.atoms = _mesa_malloc(sizeof(atoms)); - brw->state.nr_atoms = sizeof(atoms)/sizeof(*atoms); - _mesa_memcpy(brw->state.atoms, atoms, sizeof(atoms)); - - /* Patch in a pointer to the dynamic state atom: - */ - for (i = 0; i < brw->state.nr_atoms; i++) - if (brw->state.atoms[i] == NULL) - brw->state.atoms[i] = &brw->curbe.tracked_state; - - _mesa_memcpy(&brw->curbe.tracked_state, - &brw_constant_buffer, - sizeof(brw_constant_buffer)); } void brw_destroy_state( struct brw_context *brw ) { - if (brw->state.atoms) { - _mesa_free(brw->state.atoms); - brw->state.atoms = NULL; - } - brw_destroy_cache(brw); brw_destroy_batch_cache(brw); } @@ -337,7 +316,7 @@ void brw_validate_state( struct brw_context *brw ) /* do prepare stage for all atoms */ for (i = 0; i < Elements(atoms); i++) { - const struct brw_tracked_state *atom = brw->state.atoms[i]; + const struct brw_tracked_state *atom = atoms[i]; if (brw->intel.Fallback) break; @@ -368,8 +347,8 @@ void brw_upload_state(struct brw_context *brw) _mesa_memset(&examined, 0, sizeof(examined)); prev = *state; - for (i = 0; i < brw->state.nr_atoms; i++) { - const struct brw_tracked_state *atom = brw->state.atoms[i]; + for (i = 0; i < Elements(atoms); i++) { + const struct brw_tracked_state *atom = atoms[i]; struct brw_state_flags generated; assert(atom->dirty.mesa || @@ -398,7 +377,7 @@ void brw_upload_state(struct brw_context *brw) } else { for (i = 0; i < Elements(atoms); i++) { - const struct brw_tracked_state *atom = brw->state.atoms[i]; + const struct brw_tracked_state *atom = atoms[i]; if (brw->intel.Fallback) break; -- cgit v1.2.3 From a071a8d2e72e52e6a8906448b171756c8920ce96 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Apr 2009 10:44:36 -0600 Subject: i965: remove unused state atom entries --- src/mesa/drivers/dri/i965/brw_state_upload.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 491e2e2452..20892cdf32 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -59,7 +59,6 @@ const struct brw_tracked_state *atoms[] = &brw_curbe_offsets, &brw_recalculate_urb_fence, - &brw_cc_vp, &brw_cc_unit, @@ -88,15 +87,8 @@ const struct brw_tracked_state *atoms[] = &brw_line_stipple, &brw_aa_line_parameters, - /* Ordering of the commands below is documented as fixed. - */ -#if 0 - &brw_pipelined_state_pointers, - &brw_urb_fence, - &brw_constant_buffer_state, -#else + &brw_psp_urb_cbs, -#endif &brw_drawing_rect, &brw_indices, -- cgit v1.2.3 From f9af97c7a5d81226a87d79baf8fb00231c96398d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Apr 2009 11:08:46 -0600 Subject: i965: checkpoint commit: use two state caches instead of one The new, second cache will only be used for surface-related items. Since we can create many surfaces the original, single cache could get filled quickly. When we cleared it, we had to regenerate shaders, etc. With two caches, we can avoid doing that. --- src/mesa/drivers/dri/i965/brw_context.h | 3 +- src/mesa/drivers/dri/i965/brw_state.h | 4 +- src/mesa/drivers/dri/i965/brw_state_cache.c | 122 ++++++++++++++++++--------- src/mesa/drivers/dri/i965/brw_state_upload.c | 4 +- 4 files changed, 88 insertions(+), 45 deletions(-) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index af9fef5e22..cad711d18a 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -467,7 +467,8 @@ struct brw_context int validated_bo_count; } state; - struct brw_cache cache; + struct brw_cache cache; /** non-surface items */ + struct brw_cache surface_cache; /* surface items */ struct brw_cached_batch_item *cached_batch_items; struct { diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 81b0a45998..7ea2fc113c 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -135,8 +135,8 @@ dri_bo *brw_search_cache( struct brw_cache *cache, void *aux_return); void brw_state_cache_check_size( struct brw_context *brw ); -void brw_init_cache( struct brw_context *brw ); -void brw_destroy_cache( struct brw_context *brw ); +void brw_init_caches( struct brw_context *brw ); +void brw_destroy_caches( struct brw_context *brw ); /*********************************************************************** * brw_state_batch.c diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index d5b5166406..3b23a8b755 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -320,20 +320,20 @@ enum pool_type { }; static void -brw_init_cache_id( struct brw_context *brw, - const char *name, - enum brw_cache_id id, - GLuint key_size, - GLuint aux_size) +brw_init_cache_id(struct brw_cache *cache, + const char *name, + enum brw_cache_id id, + GLuint key_size, + GLuint aux_size) { - struct brw_cache *cache = &brw->cache; - cache->name[id] = strdup(name); cache->key_size[id] = key_size; cache->aux_size[id] = aux_size; } -void brw_init_cache( struct brw_context *brw ) + +static void +brw_init_non_surface_cache( struct brw_context *brw ) { struct brw_cache *cache = &brw->cache; @@ -342,114 +342,145 @@ void brw_init_cache( struct brw_context *brw ) cache->size = 7; cache->n_items = 0; cache->items = (struct brw_cache_item **) - _mesa_calloc(cache->size * - sizeof(struct brw_cache_item)); + _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CC_VP", BRW_CC_VP, sizeof(struct brw_cc_viewport), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CC_UNIT", BRW_CC_UNIT, sizeof(struct brw_cc_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "WM_PROG", BRW_WM_PROG, sizeof(struct brw_wm_prog_key), sizeof(struct brw_wm_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SAMPLER_DEFAULT_COLOR", BRW_SAMPLER_DEFAULT_COLOR, sizeof(struct brw_sampler_default_color), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SAMPLER", BRW_SAMPLER, 0, /* variable key/data size */ 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "WM_UNIT", BRW_WM_UNIT, sizeof(struct brw_wm_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SF_PROG", BRW_SF_PROG, sizeof(struct brw_sf_prog_key), sizeof(struct brw_sf_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SF_VP", BRW_SF_VP, sizeof(struct brw_sf_viewport), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SF_UNIT", BRW_SF_UNIT, sizeof(struct brw_sf_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "VS_UNIT", BRW_VS_UNIT, sizeof(struct brw_vs_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "VS_PROG", BRW_VS_PROG, sizeof(struct brw_vs_prog_key), sizeof(struct brw_vs_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CLIP_UNIT", BRW_CLIP_UNIT, sizeof(struct brw_clip_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CLIP_PROG", BRW_CLIP_PROG, sizeof(struct brw_clip_prog_key), sizeof(struct brw_clip_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "GS_UNIT", BRW_GS_UNIT, sizeof(struct brw_gs_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "GS_PROG", BRW_GS_PROG, sizeof(struct brw_gs_prog_key), sizeof(struct brw_gs_prog_data)); +#if 1 + brw_init_cache_id(cache, + "SS_SURFACE", + BRW_SS_SURFACE, + sizeof(struct brw_surface_state), + 0); + + brw_init_cache_id(cache, + "SS_SURF_BIND", + BRW_SS_SURF_BIND, + 0, + 0); +#endif +} + +static void +brw_init_surface_cache( struct brw_context *brw ) +{ + struct brw_cache *cache = &brw->surface_cache; - brw_init_cache_id(brw, + cache->brw = brw; + + cache->size = 7; + cache->n_items = 0; + cache->items = (struct brw_cache_item **) + _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); + + brw_init_cache_id(cache, "SS_SURFACE", BRW_SS_SURFACE, sizeof(struct brw_surface_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SS_SURF_BIND", BRW_SS_SURF_BIND, 0, 0); } +void brw_init_caches( struct brw_context *brw ) +{ + brw_init_non_surface_cache(brw); + brw_init_surface_cache(brw); +} + static void -brw_clear_cache( struct brw_context *brw ) +brw_clear_cache( struct brw_context *brw, struct brw_cache *cache ) { struct brw_cache_item *c, *next; GLuint i; @@ -457,8 +488,8 @@ brw_clear_cache( struct brw_context *brw ) if (INTEL_DEBUG & DEBUG_STATE) _mesa_printf("%s\n", __FUNCTION__); - for (i = 0; i < brw->cache.size; i++) { - for (c = brw->cache.items[i]; c; c = next) { + for (i = 0; i < cache->size; i++) { + for (c = cache->items[i]; c; c = next) { int j; next = c->next; @@ -468,10 +499,10 @@ brw_clear_cache( struct brw_context *brw ) free((void *)c->key); free(c); } - brw->cache.items[i] = NULL; + cache->items[i] = NULL; } - brw->cache.n_items = 0; + cache->n_items = 0; if (brw->curbe.last_buf) { _mesa_free(brw->curbe.last_buf); @@ -489,19 +520,30 @@ void brw_state_cache_check_size( struct brw_context *brw ) * 32k, so 1000 of them is around 1.5MB. */ if (brw->cache.n_items > 1000) - brw_clear_cache(brw); + brw_clear_cache(brw, &brw->cache); + + if (brw->surface_cache.n_items > 1000) + brw_clear_cache(brw, &brw->surface_cache); } -void brw_destroy_cache( struct brw_context *brw ) + +static void +brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) { GLuint i; - brw_clear_cache(brw); + brw_clear_cache(brw, cache); for (i = 0; i < BRW_MAX_CACHE; i++) { - dri_bo_unreference(brw->cache.last_bo[i]); - free(brw->cache.name[i]); + dri_bo_unreference(cache->last_bo[i]); + free(cache->name[i]); } - free(brw->cache.items); - brw->cache.items = NULL; - brw->cache.size = 0; + free(cache->items); + cache->items = NULL; + cache->size = 0; +} + +void brw_destroy_caches( struct brw_context *brw ) +{ + brw_destroy_cache(brw, &brw->cache); + brw_destroy_cache(brw, &brw->surface_cache); } diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 20892cdf32..2641bcb2aa 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -100,13 +100,13 @@ const struct brw_tracked_state *atoms[] = void brw_init_state( struct brw_context *brw ) { - brw_init_cache(brw); + brw_init_caches(brw); } void brw_destroy_state( struct brw_context *brw ) { - brw_destroy_cache(brw); + brw_destroy_caches(brw); brw_destroy_batch_cache(brw); } -- cgit v1.2.3 From 4843e54fc69daf379dea9899673b3df92b44049c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Apr 2009 11:12:07 -0600 Subject: i965: actually use the new, second surface state cache --- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 39 +++++++++++++----------- 1 file changed, 22 insertions(+), 17 deletions(-) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 89c456e62c..74f3f1791e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -268,7 +268,7 @@ brw_create_texture_surface( struct brw_context *brw, surf.ss0.cube_neg_z = 1; } - bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, key, sizeof(*key), &key->bo, key->bo ? 1 : 0, &surf, sizeof(surf), @@ -321,10 +321,11 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.tiling = intelObj->mt->region->tiling; dri_bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, - NULL); + brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); if (brw->wm.surf_bo[surf] == NULL) { brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key); } @@ -362,7 +363,7 @@ brw_create_constant_surface( struct brw_context *brw, surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */ brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */ - bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, key, sizeof(*key), &key->bo, key->bo ? 1 : 0, &surf, sizeof(surf), @@ -427,7 +428,8 @@ brw_update_wm_constant_surface( GLcontext *ctx, */ dri_bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, &key, sizeof(key), &key.bo, key.bo ? 1 : 0, NULL); @@ -484,7 +486,8 @@ brw_update_vs_constant_surface( GLcontext *ctx, */ dri_bo_unreference(brw->vs.surf_bo[surf]); - brw->vs.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, &key, sizeof(key), &key.bo, key.bo ? 1 : 0, NULL); @@ -563,10 +566,11 @@ brw_update_renderbuffer_surface(struct brw_context *brw, dri_bo_unreference(brw->wm.surf_bo[unit]); brw->wm.surf_bo[unit] = NULL; if (cached) - brw->wm.surf_bo[unit] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), - ®ion_bo, 1, - NULL); + brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + ®ion_bo, 1, + NULL); if (brw->wm.surf_bo[unit] == NULL) { struct brw_surface_state surf; @@ -591,7 +595,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw, surf.ss0.writedisable_alpha = !key.color_mask[3]; /* Key size will never match key size for textures, so we're safe. */ - brw->wm.surf_bo[unit] = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache, + BRW_SS_SURFACE, &key, sizeof(key), ®ion_bo, 1, &surf, sizeof(surf), @@ -623,7 +628,7 @@ brw_wm_get_binding_table(struct brw_context *brw) assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF); - bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND, + bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, brw->wm.surf_bo, brw->wm.nr_surfaces, NULL); @@ -639,7 +644,7 @@ brw_wm_get_binding_table(struct brw_context *brw) else data[i] = 0; - bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND, + bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, brw->wm.surf_bo, brw->wm.nr_surfaces, data, data_size, @@ -739,7 +744,7 @@ brw_vs_get_binding_table(struct brw_context *brw) assert(brw->vs.nr_surfaces <= BRW_VS_MAX_SURF); - bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND, + bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, brw->vs.surf_bo, brw->vs.nr_surfaces, NULL); @@ -755,7 +760,7 @@ brw_vs_get_binding_table(struct brw_context *brw) else data[i] = 0; - bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND, + bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, brw->vs.surf_bo, brw->vs.nr_surfaces, data, data_size, -- cgit v1.2.3 From c0c58cf5cfc11b9256287871660cc16966e662ef Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Apr 2009 11:28:24 -0600 Subject: i965: comments, reformatting --- src/mesa/drivers/dri/i965/brw_state_cache.c | 55 ++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 17 deletions(-) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index 3b23a8b755..cbae68798c 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -56,9 +56,9 @@ * incorrect program is run for the other instance. */ +#include "main/imports.h" #include "brw_state.h" #include "intel_batchbuffer.h" -#include "main/imports.h" /* XXX: Fixme - have to include these to get the sizes of the prog_key * structs: @@ -69,8 +69,10 @@ #include "brw_sf.h" #include "brw_gs.h" -static GLuint hash_key( const void *key, GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs) + +static GLuint +hash_key(const void *key, GLuint key_size, + dri_bo **reloc_bufs, GLuint nr_reloc_bufs) { GLuint *ikey = (GLuint *)key; GLuint hash = 0, i; @@ -95,6 +97,7 @@ static GLuint hash_key( const void *key, GLuint key_size, return hash; } + /** * Marks a new buffer as being chosen for the given cache id. */ @@ -111,6 +114,7 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id, cache->brw->state.dirty.cache |= 1 << cache_id; } + static struct brw_cache_item * search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, GLuint hash, const void *key, GLuint key_size, @@ -143,7 +147,8 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, } -static void rehash( struct brw_cache *cache ) +static void +rehash(struct brw_cache *cache) { struct brw_cache_item **items; struct brw_cache_item *c, *next; @@ -164,15 +169,17 @@ static void rehash( struct brw_cache *cache ) cache->size = size; } + /** * Returns the buffer object matching cache_id and key, or NULL. */ -dri_bo *brw_search_cache( struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs, - void *aux_return ) +dri_bo * +brw_search_cache(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + dri_bo **reloc_bufs, GLuint nr_reloc_bufs, + void *aux_return) { struct brw_cache_item *item; GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); @@ -192,6 +199,7 @@ dri_bo *brw_search_cache( struct brw_cache *cache, return item->bo; } + dri_bo * brw_upload_cache( struct brw_cache *cache, enum brw_cache_id cache_id, @@ -265,7 +273,9 @@ brw_upload_cache( struct brw_cache *cache, return bo; } -/* This doesn't really work with aux data. Use search/upload instead + +/** + * This doesn't really work with aux data. Use search/upload instead */ dri_bo * brw_cache_data_sz(struct brw_cache *cache, @@ -296,6 +306,7 @@ brw_cache_data_sz(struct brw_cache *cache, return bo; } + /** * Wrapper around brw_cache_data_sz using the cache_id's canonical key size. * @@ -319,6 +330,7 @@ enum pool_type { DW_GENERAL_STATE }; + static void brw_init_cache_id(struct brw_cache *cache, const char *name, @@ -333,7 +345,7 @@ brw_init_cache_id(struct brw_cache *cache, static void -brw_init_non_surface_cache( struct brw_context *brw ) +brw_init_non_surface_cache(struct brw_context *brw) { struct brw_cache *cache = &brw->cache; @@ -433,6 +445,7 @@ brw_init_non_surface_cache( struct brw_context *brw ) BRW_GS_PROG, sizeof(struct brw_gs_prog_key), sizeof(struct brw_gs_prog_data)); + #if 1 brw_init_cache_id(cache, "SS_SURFACE", @@ -448,8 +461,9 @@ brw_init_non_surface_cache( struct brw_context *brw ) #endif } + static void -brw_init_surface_cache( struct brw_context *brw ) +brw_init_surface_cache(struct brw_context *brw) { struct brw_cache *cache = &brw->surface_cache; @@ -473,14 +487,17 @@ brw_init_surface_cache( struct brw_context *brw ) 0); } -void brw_init_caches( struct brw_context *brw ) + +void +brw_init_caches(struct brw_context *brw) { brw_init_non_surface_cache(brw); brw_init_surface_cache(brw); } + static void -brw_clear_cache( struct brw_context *brw, struct brw_cache *cache ) +brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) { struct brw_cache_item *c, *next; GLuint i; @@ -514,7 +531,9 @@ brw_clear_cache( struct brw_context *brw, struct brw_cache *cache ) brw->state.dirty.cache |= ~0; } -void brw_state_cache_check_size( struct brw_context *brw ) + +void +brw_state_cache_check_size(struct brw_context *brw) { /* un-tuned guess. We've got around 20 state objects for a total of around * 32k, so 1000 of them is around 1.5MB. @@ -542,7 +561,9 @@ brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) cache->size = 0; } -void brw_destroy_caches( struct brw_context *brw ) + +void +brw_destroy_caches(struct brw_context *brw) { brw_destroy_cache(brw, &brw->cache); brw_destroy_cache(brw, &brw->surface_cache); -- cgit v1.2.3 From 21a422d97e501f4ca68ab24ad3fe5f5eb1393349 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Apr 2009 11:29:49 -0600 Subject: i965: remove old code to init surface-related cache IDs These types are only found in the new surface state cache now. --- src/mesa/drivers/dri/i965/brw_state_cache.c | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index cbae68798c..320d886c99 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -445,20 +445,6 @@ brw_init_non_surface_cache(struct brw_context *brw) BRW_GS_PROG, sizeof(struct brw_gs_prog_key), sizeof(struct brw_gs_prog_data)); - -#if 1 - brw_init_cache_id(cache, - "SS_SURFACE", - BRW_SS_SURFACE, - sizeof(struct brw_surface_state), - 0); - - brw_init_cache_id(cache, - "SS_SURF_BIND", - BRW_SS_SURF_BIND, - 0, - 0); -#endif } -- cgit v1.2.3 From 5c8fb6acc10662c9e71078c9f273db6c7808e9ff Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Apr 2009 11:47:59 -0600 Subject: i965: define BRW_MAX_GRF --- src/mesa/drivers/dri/i965/brw_context.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index cad711d18a..f0d4993e11 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -243,6 +243,9 @@ struct brw_vs_ouput_sizes { }; +/** Number of general purpose registers (VS, WM, etc) */ +#define BRW_MAX_GRF 128 + /** Number of texture sampler units */ #define BRW_MAX_TEX_UNIT 16 -- cgit v1.2.3 From ac22178eb049126003db40b0a77a111498a12ab7 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Apr 2009 11:52:16 -0600 Subject: i965: enable VS constant buffers In the VS constants can now be handled in two different ways: 1. If there's room in the GRF, put constants there. They're preloaded from the CURBE prior to VS execution. This is the historical approach. The problem is the GRF may not have room for all the shader's constants and temps and misc registers. Hence... 2. Use a separate constant buffer which is read from using a READ message. This allows a very large number of constants and frees up GRF regs for shader temporaries. This is the new approach. May be a little slower than 1. 1 vs. 2 is chosen according to how many constants and temps the shader needs. --- src/mesa/drivers/dri/i965/brw_vs_emit.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 524f1211ce..1da5a3f502 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -69,13 +69,17 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) { GLuint i, reg = 0, mrf; -#if 0 - if (c->vp->program.Base.Parameters->NumParameters >= 6) - c->use_const_buffer = 1; + /* Determine whether to use a real constant buffer or use a block + * of GRF registers for constants. The later is faster but only + * works if everything fits in the GRF. + * XXX this heuristic/check may need some fine tuning... + */ + if (c->vp->program.Base.Parameters->NumParameters + + c->vp->program.Base.NumTemporaries + 20 > BRW_MAX_GRF) + c->use_const_buffer = GL_TRUE; else -#endif c->use_const_buffer = GL_FALSE; - /*printf("use_const_buffer = %d\n", c->use_const_buffer);*/ + printf("use_const_buffer = %d\n", c->use_const_buffer); /* r0 -- reserved as usual */ -- cgit v1.2.3 From ebfbd8c4fef78e3cd9604660e5bb96e3c6df07e5 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Apr 2009 13:46:58 -0600 Subject: i965: disable debug printf --- src/mesa/drivers/dri/i965/brw_vs_emit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 1da5a3f502..c2b3702798 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -79,7 +79,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) c->use_const_buffer = GL_TRUE; else c->use_const_buffer = GL_FALSE; - printf("use_const_buffer = %d\n", c->use_const_buffer); + /*printf("use_const_buffer = %d\n", c->use_const_buffer);*/ /* r0 -- reserved as usual */ -- cgit v1.2.3 From b58b3a786aa38dcc9d72144c2cc691151e46e3d5 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 24 Apr 2009 16:33:46 -0600 Subject: i965: rework GLSL/WM register allocation Use a bitvector of used/free flags. If we run out of temps, examine the live intervals of the temp regs in the program and free those which are no longer alive. Also, enable the new WM const buffer code. --- src/mesa/drivers/dri/i965/brw_wm.h | 5 +- src/mesa/drivers/dri/i965/brw_wm_glsl.c | 211 +++++++++++++++++++++++++------- 2 files changed, 168 insertions(+), 48 deletions(-) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index d0ab3bdc65..75205fddb7 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -240,15 +240,18 @@ struct brw_wm_compile { GLuint max_wm_grf; GLuint last_scratch; + GLuint cur_inst; /**< index of current instruction */ + /** Mapping from Mesa registers to hardware registers */ struct { GLboolean inited; struct brw_reg reg; } wm_regs[PROGRAM_PAYLOAD+1][256][4]; + GLboolean used_grf[BRW_WM_MAX_GRF]; + GLuint first_free_grf; struct brw_reg stack; struct brw_reg emit_mask_reg; - GLuint reg_index; /**< Index of next free GRF register */ GLuint tmp_regs[BRW_WM_MAX_GRF]; GLuint tmp_index; GLuint tmp_max; diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 22e17622c6..3471c1946e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -1,5 +1,7 @@ #include "main/macros.h" #include "shader/prog_parameter.h" +#include "shader/prog_print.h" +#include "shader/prog_optimize.h" #include "brw_context.h" #include "brw_eu.h" #include "brw_wm.h" @@ -42,6 +44,76 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) } + +static void +reclaim_temps(struct brw_wm_compile *c); + + +/** Mark GRF register as used. */ +static void +prealloc_grf(struct brw_wm_compile *c, int r) +{ + c->used_grf[r] = GL_TRUE; +} + + +/** Mark given GRF register as not in use. */ +static void +release_grf(struct brw_wm_compile *c, int r) +{ + /*assert(c->used_grf[r]);*/ + c->used_grf[r] = GL_FALSE; + c->first_free_grf = MIN2(c->first_free_grf, r); +} + + +/** Return index of a free GRF, mark it as used. */ +static int +alloc_grf(struct brw_wm_compile *c) +{ + GLuint r; + for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { + if (!c->used_grf[r]) { + c->used_grf[r] = GL_TRUE; + c->first_free_grf = r + 1; /* a guess */ + return r; + } + } + + /* no free temps, try to reclaim some */ + reclaim_temps(c); + c->first_free_grf = 0; + + /* try alloc again */ + for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { + if (!c->used_grf[r]) { + c->used_grf[r] = GL_TRUE; + c->first_free_grf = r + 1; /* a guess */ + return r; + } + } + + for (r = 0; r < BRW_WM_MAX_GRF; r++) { + assert(c->used_grf[r]); + } + /*printf("Really out of temp regs!\n");*/ + return 60; +} + + +/** Return number of GRF registers used */ +static int +num_grf_used(const struct brw_wm_compile *c) +{ + int r; + for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--) + if (c->used_grf[r]) + return r + 1; + return 0; +} + + + /** * Record the mapping of a Mesa register to a hardware register. */ @@ -68,11 +140,18 @@ static int get_scalar_dst_index(const struct prog_instruction *inst) static struct brw_reg alloc_tmp(struct brw_wm_compile *c) { struct brw_reg reg; - if(c->tmp_index == c->tmp_max) - c->tmp_regs[ c->tmp_max++ ] = c->reg_index++; - + + /* if we need to allocate another temp, grow the tmp_regs[] array */ + if (c->tmp_index == c->tmp_max) { + c->tmp_regs[ c->tmp_max++ ] = alloc_grf(c); + } + + /* form the GRF register */ reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0); + /*printf("alloc_temp %d\n", reg.nr);*/ + assert(reg.nr < BRW_WM_MAX_GRF); return reg; + } /** @@ -130,35 +209,26 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, return brw_null_reg(); } + assert(index < 256); /* see if we've already allocated a HW register for this Mesa register */ if (c->wm_regs[file][index][component].inited) { - /* yes, re-use */ - reg = c->wm_regs[file][index][component].reg; + /* yes, re-use */ + reg = c->wm_regs[file][index][component].reg; } else { /* no, allocate new register */ - reg = brw_vec8_grf(c->reg_index, 0); - } + int grf = alloc_grf(c); + if (grf < 0) { + /* totally out of temps */ + grf = 70; /* XXX !!!! */ + } - /* if this is a new register allocation, record it in the table */ - if (!c->wm_regs[file][index][component].inited) { - set_reg(c, file, index, component, reg); - c->reg_index++; - } + reg = brw_vec8_grf(grf, 0); + /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/ - if (c->reg_index >= BRW_WM_MAX_GRF - 12) { - /* ran out of temporary registers! */ -#if 1 - /* This is a big hack for now. - * Return bad register index, just don't hang the GPU. - */ - _mesa_fprintf(stderr, "out of regs %d\n", c->reg_index); - c->reg_index = BRW_WM_MAX_GRF - 13; -#else - return brw_null_reg(); -#endif + set_reg(c, file, index, component, reg); } - + if (neg & (1 << component)) { reg = negate(reg); } @@ -168,6 +238,46 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, } + +/** + * This is called if we run out of GRF registers. Examine the live intervals + * of temp regs in the program and free those which won't be used again. + */ +static void +reclaim_temps(struct brw_wm_compile *c) +{ + GLint intBegin[MAX_PROGRAM_TEMPS]; + GLint intEnd[MAX_PROGRAM_TEMPS]; + int index; + + /*printf("Reclaim temps:\n");*/ + + _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns, + intBegin, intEnd); + + for (index = 0; index < MAX_PROGRAM_TEMPS; index++) { + if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) { + /* program temp[i] can be freed */ + int component; + /*printf(" temp[%d] is dead\n", index);*/ + for (component = 0; component < 4; component++) { + if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) { + int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr; + release_grf(c, r); + /* + printf(" Reclaim temp %d, reg %d at inst %d\n", + index, r, c->cur_inst); + */ + c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE; + } + } + } + } +} + + + + /** * Preallocate registers. This sets up the Mesa to hardware register * mapping for certain registers, such as constants (uniforms/state vars) @@ -179,6 +289,10 @@ static void prealloc_reg(struct brw_wm_compile *c) struct brw_reg reg; int nr_interp_regs = 0; GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted; + GLuint reg_index = 0; + + memset(c->used_grf, GL_FALSE, sizeof(c->used_grf)); + c->first_free_grf = 0; for (i = 0; i < 4; i++) { if (i < c->key.nr_depth_regs) @@ -187,14 +301,20 @@ static void prealloc_reg(struct brw_wm_compile *c) reg = brw_vec8_grf(0, 0); set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg); } - c->reg_index += 2 * c->key.nr_depth_regs; + reg_index += 2 * c->key.nr_depth_regs; /* constants */ { - const int nr_params = c->fp->program.Base.Parameters->NumParameters; + const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters; + const GLuint nr_temps = c->fp->program.Base.NumTemporaries; /* use a real constant buffer, or just use a section of the GRF? */ - c->use_const_buffer = GL_FALSE; /* (nr_params > 8);*/ + /* XXX this heuristic may need adjustment... */ + if ((nr_params + nr_temps) * 4 + reg_index > 80) + c->use_const_buffer = GL_TRUE; + else + c->use_const_buffer = GL_FALSE; + /*printf("WM use_const_buffer = %d\n", c->use_const_buffer);*/ if (c->use_const_buffer) { /* We'll use a real constant buffer and fetch constants from @@ -216,7 +336,7 @@ static void prealloc_reg(struct brw_wm_compile *c) for (i = 0; i < nr_params; i++) { /* loop over XYZW channels */ for (j = 0; j < 4; j++, index++) { - reg = brw_vec1_grf(c->reg_index + index / 8, index % 8); + reg = brw_vec1_grf(reg_index + index / 8, index % 8); /* Save pointer to parameter/constant value. * Constants will be copied in prepare_constant_buffer() */ @@ -226,7 +346,7 @@ static void prealloc_reg(struct brw_wm_compile *c) } /* number of constant regs used (each reg is float[8]) */ c->nr_creg = 2 * ((4 * nr_params + 15) / 16); - c->reg_index += c->nr_creg; + reg_index += c->nr_creg; } } @@ -234,20 +354,24 @@ static void prealloc_reg(struct brw_wm_compile *c) for (i = 0; i < FRAG_ATTRIB_MAX; i++) { if (inputs & (1<reg_index, 0); + reg = brw_vec8_grf(reg_index, 0); for (j = 0; j < 4; j++) set_reg(c, PROGRAM_PAYLOAD, i, j, reg); - c->reg_index += 2; + reg_index += 2; } } c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; c->prog_data.urb_read_length = nr_interp_regs * 2; c->prog_data.curb_read_length = c->nr_creg; - c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); - c->reg_index++; - c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); - c->reg_index += 2; + c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); + reg_index++; + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); + reg_index += 2; + + /* mark GRF regs [0..reg_index-1] as in-use */ + for (i = 0; i < reg_index; i++) + prealloc_grf(c, i); /* An instruction may reference up to three constants. * They'll be found in these registers. @@ -256,13 +380,9 @@ static void prealloc_reg(struct brw_wm_compile *c) if (c->use_const_buffer) { for (i = 0; i < 3; i++) { c->current_const[i].index = -1; - c->current_const[i].reg = alloc_tmp(c); + c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0); } } -#if 0 - printf("USE CONST BUFFER? %d\n", c->use_const_buffer); - printf("AFTER PRE_ALLOC, reg_index = %d\n", c->reg_index); -#endif } @@ -2595,7 +2715,6 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) struct brw_compile *p = &c->func; struct brw_indirect stack_index = brw_indirect(0, 0); - c->reg_index = 0; prealloc_reg(c); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); @@ -2603,6 +2722,8 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) for (i = 0; i < c->nr_fp_insns; i++) { const struct prog_instruction *inst = &c->prog_instructions[i]; + c->cur_inst = i; + #if 0 _mesa_printf("Inst %d: ", i); _mesa_print_instruction(inst); @@ -2833,17 +2954,13 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) _mesa_printf("unsupported IR in fragment shader %d\n", inst->Opcode); } + if (inst->CondUpdate) brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); else brw_set_predicate_control(p, BRW_PREDICATE_NONE); } post_wm_emit(c); - - if (c->reg_index >= BRW_WM_MAX_GRF) { - _mesa_problem(NULL, "Ran out of registers in brw_wm_emit_glsl()"); - /* XXX we need to do some proper error recovery here */ - } } @@ -2867,6 +2984,6 @@ void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) brw_wm_print_program(c, "brw_wm_glsl_emit done"); } - c->prog_data.total_grf = c->reg_index; + c->prog_data.total_grf = num_grf_used(c); c->prog_data.total_scratch = 0; } -- cgit v1.2.3 From 777b9ff43e88e456d686208c83712f26aba2dd95 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 27 Apr 2009 10:45:41 -0600 Subject: i965: only upload constant buffer data when we actually need the const buffer Make the use_const_buffer field per-program and only call the code which updates the constant buffer's data if the flag is set. This should undo the perf regression from 20f3497e4b6756e330f7b3f54e8acaa1d6c92052 (cherry picked from master, commit dc9705d12d162ba6d087eb762e315de9f97bc456) --- src/mesa/drivers/dri/i965/brw_context.h | 2 ++ src/mesa/drivers/dri/i965/brw_curbe.c | 6 ++++-- src/mesa/drivers/dri/i965/brw_vs.h | 2 -- src/mesa/drivers/dri/i965/brw_vs_emit.c | 11 ++++++----- src/mesa/drivers/dri/i965/brw_wm.h | 2 -- src/mesa/drivers/dri/i965/brw_wm_glsl.c | 16 ++++++++++------ 6 files changed, 22 insertions(+), 17 deletions(-) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index f0d4993e11..838e718d0d 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -161,6 +161,7 @@ struct brw_vertex_program { struct gl_vertex_program program; GLuint id; dri_bo *const_buffer; /** Program constant buffer/surface */ + GLboolean use_const_buffer; }; @@ -171,6 +172,7 @@ struct brw_fragment_program { GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */ dri_bo *const_buffer; /** Program constant buffer/surface */ + GLboolean use_const_buffer; }; diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index da746e4aa0..e6e26cdc40 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -368,7 +368,8 @@ update_vertex_constant_buffer(struct brw_context *brw) printf("update VS constants in buffer %p vp = %p\n", vp->const_buffer, vp); printf("program %u\n", vp->program.Base.Id); } - update_constant_buffer(brw, vp->program.Base.Parameters, vp->const_buffer); + if (vp->use_const_buffer) + update_constant_buffer(brw, vp->program.Base.Parameters, vp->const_buffer); } @@ -382,7 +383,8 @@ update_fragment_constant_buffer(struct brw_context *brw) printf("update WM constants in buffer %p\n", fp->const_buffer); printf("program %u\n", fp->program.Base.Id); } - update_constant_buffer(brw, fp->program.Base.Parameters, fp->const_buffer); + if (fp->use_const_buffer) + update_constant_buffer(brw, fp->program.Base.Parameters, fp->const_buffer); } diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index d20cf78b8a..1e4f66091e 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -75,8 +75,6 @@ struct brw_vs_compile { struct brw_reg userplane[6]; - /** using a real constant buffer? */ - GLboolean use_const_buffer; /** we may need up to 3 constants per instruction (if use_const_buffer) */ struct { GLint index; diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index c2b3702798..b9a338b1cd 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -76,9 +76,10 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) */ if (c->vp->program.Base.Parameters->NumParameters + c->vp->program.Base.NumTemporaries + 20 > BRW_MAX_GRF) - c->use_const_buffer = GL_TRUE; + c->vp->use_const_buffer = GL_TRUE; else - c->use_const_buffer = GL_FALSE; + c->vp->use_const_buffer = GL_FALSE; + /*printf("use_const_buffer = %d\n", c->use_const_buffer);*/ /* r0 -- reserved as usual @@ -100,7 +101,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* Vertex program parameters from curbe: */ - if (c->use_const_buffer) { + if (c->vp->use_const_buffer) { /* get constants from a real constant buffer */ c->prog_data.curb_read_length = 0; c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */ @@ -176,7 +177,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) reg++; } - if (c->use_const_buffer) { + if (c->vp->use_const_buffer) { for (i = 0; i < 3; i++) { c->current_const[i].index = -1; c->current_const[i].reg = brw_vec8_grf(reg, 0); @@ -873,7 +874,7 @@ get_src_reg( struct brw_vs_compile *c, case PROGRAM_STATE_VAR: case PROGRAM_CONSTANT: case PROGRAM_UNIFORM: - if (c->use_const_buffer) { + if (c->vp->use_const_buffer) { return get_constant(c, inst, argIndex); } else if (relAddr) { diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 75205fddb7..2f80a60c12 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -257,8 +257,6 @@ struct brw_wm_compile { GLuint tmp_max; GLuint subroutines[BRW_WM_MAX_SUBROUTINE]; - /** using a real constant buffer? */ - GLboolean use_const_buffer; /** we may need up to 3 constants per instruction (if use_const_buffer) */ struct { GLint index; diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 3471c1946e..eca4ca2c82 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -311,12 +311,12 @@ static void prealloc_reg(struct brw_wm_compile *c) /* use a real constant buffer, or just use a section of the GRF? */ /* XXX this heuristic may need adjustment... */ if ((nr_params + nr_temps) * 4 + reg_index > 80) - c->use_const_buffer = GL_TRUE; + c->fp->use_const_buffer = GL_TRUE; else - c->use_const_buffer = GL_FALSE; + c->fp->use_const_buffer = GL_FALSE; /*printf("WM use_const_buffer = %d\n", c->use_const_buffer);*/ - if (c->use_const_buffer) { + if (c->fp->use_const_buffer) { /* We'll use a real constant buffer and fetch constants from * it with a dataport read message. */ @@ -377,12 +377,16 @@ static void prealloc_reg(struct brw_wm_compile *c) * They'll be found in these registers. * XXX alloc these on demand! */ - if (c->use_const_buffer) { + if (c->fp->use_const_buffer) { for (i = 0; i < 3; i++) { c->current_const[i].index = -1; c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0); } } +#if 0 + printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer); + printf("AFTER PRE_ALLOC, reg_index = %d\n", c->reg_index); +#endif } @@ -488,7 +492,7 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c, const GLuint nr = 1; const GLuint component = GET_SWZ(src->Swizzle, channel); - if (c->use_const_buffer && + if (c->fp->use_const_buffer && (src->File == PROGRAM_STATE_VAR || src->File == PROGRAM_CONSTANT || src->File == PROGRAM_UNIFORM)) { @@ -2730,7 +2734,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) #endif /* fetch any constants that this instruction needs */ - if (c->use_const_buffer) + if (c->fp->use_const_buffer) fetch_constants(c, inst); if (inst->CondUpdate) -- cgit v1.2.3 From dd4802176f7751e8c38c000687ff9cb9633649aa Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 27 Apr 2009 10:46:30 -0600 Subject: i965: #include prog_print.h to silence warning --- src/mesa/drivers/dri/i965/brw_curbe.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index e6e26cdc40..f6d2014fb1 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -36,6 +36,7 @@ #include "main/macros.h" #include "main/enums.h" #include "shader/prog_parameter.h" +#include "shader/prog_print.h" #include "shader/prog_statevars.h" #include "intel_batchbuffer.h" #include "intel_regions.h" -- cgit v1.2.3