diff options
Diffstat (limited to 'src/mesa')
| -rw-r--r-- | src/mesa/drivers/dri/i915/i915_context.c | 2 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 13 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/i965/brw_curbe.c | 22 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/i965/brw_state.h | 4 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/i965/brw_state_cache.c | 153 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/i965/brw_state_upload.c | 46 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/i965/brw_vs_emit.c | 15 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm.h | 5 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_glsl.c | 207 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 67 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/r200/r200_state.c | 3 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/r300/r300_fragprog.c | 3 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/r300/r300_state.c | 7 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/r300/r500_fragprog.c | 3 | ||||
| -rw-r--r-- | src/mesa/shader/arbprogram.c | 13 | ||||
| -rw-r--r-- | src/mesa/shader/nvprogram.c | 2 | ||||
| -rw-r--r-- | src/mesa/shader/prog_optimize.c | 154 | ||||
| -rw-r--r-- | src/mesa/shader/prog_optimize.h | 12 | ||||
| -rw-r--r-- | src/mesa/shader/shader_api.c | 6 | 
19 files changed, 505 insertions, 232 deletions
| diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index 1f9f363df9..367d2a3b64 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -73,7 +73,7 @@ i915InvalidateState(GLcontext * ctx, GLuint new_state)           p->params_uptodate = 0;     } -   if (new_state & (_NEW_FOG | _NEW_HINT | _NEW_PROGRAM)) +   if (new_state & (_NEW_FOG | _NEW_HINT | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS))        i915_update_fog(ctx);  } diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index aef2ff5f86..838e718d0d 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -245,6 +245,9 @@ struct brw_vs_ouput_sizes {  }; +/** Number of general purpose registers (VS, WM, etc) */ +#define BRW_MAX_GRF 128 +  /** Number of texture sampler units */  #define BRW_MAX_TEX_UNIT 16 @@ -450,8 +453,6 @@ struct brw_context     struct {        struct brw_state_flags dirty; -      struct brw_tracked_state **atoms; -      GLuint nr_atoms;        GLuint nr_color_regions;        struct intel_region *color_regions[MAX_DRAW_BUFFERS]; @@ -471,7 +472,8 @@ struct brw_context        int validated_bo_count;     } state; -   struct brw_cache cache; +   struct brw_cache cache;  /** non-surface items */ +   struct brw_cache surface_cache;  /* surface items */     struct brw_cached_batch_item *cached_batch_items;     struct { @@ -555,11 +557,6 @@ struct brw_context        GLuint vs_size;        GLuint total_size; -      /* Dynamic tracker which changes to reflect the state referenced -       * by active fp and vp program parameters: -       */ -      struct brw_tracked_state tracked_state; -        dri_bo *curbe_bo;        /** Offset within curbe_bo of space for current curbe entry */        GLuint curbe_offset; diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 9197fede2d..05a685af3d 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -36,6 +36,7 @@  #include "main/macros.h"  #include "main/enums.h"  #include "shader/prog_parameter.h" +#include "shader/prog_print.h"  #include "shader/prog_statevars.h"  #include "intel_batchbuffer.h"  #include "intel_regions.h" @@ -188,13 +189,6 @@ static void prepare_constant_buffer(struct brw_context *brw)     GLfloat *buf;     GLuint i; -   /* Update our own dependency flags.  This works because this -    * function will also be called whenever fp or vp changes. -    */ -   brw->curbe.tracked_state.dirty.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION); -   brw->curbe.tracked_state.dirty.mesa |= vp->program.Base.Parameters->StateFlags; -   brw->curbe.tracked_state.dirty.mesa |= fp->program.Base.Parameters->StateFlags; -     if (sz == 0) {        if (brw->curbe.last_buf) {  	 free(brw->curbe.last_buf); @@ -363,11 +357,7 @@ update_constant_buffer(struct brw_context *brw,        }        if (0) { -         int i; -         for (i = 0; i < params->NumParameters; i++) { -            float *p = params->ParameterValues[i]; -            printf("%d: %f %f %f %f\n", i, p[0], p[1], p[2], p[3]); -         } +         _mesa_print_parameter_list(params);        }     }  } @@ -380,7 +370,7 @@ update_vertex_constant_buffer(struct brw_context *brw)     struct brw_vertex_program *vp =        (struct brw_vertex_program *) brw->vertex_program;     if (0) { -      printf("update VS constants in buffer %p\n", vp->const_buffer); +      printf("update VS constants in buffer %p  vp = %p\n", vp->const_buffer, vp);        printf("program %u\n", vp->program.Base.Id);     }     if (vp->use_const_buffer) @@ -394,6 +384,10 @@ update_fragment_constant_buffer(struct brw_context *brw)  {     struct brw_fragment_program *fp =        (struct brw_fragment_program *) brw->fragment_program; +   if (0) { +      printf("update WM constants in buffer %p\n", fp->const_buffer); +      printf("program %u\n", fp->program.Base.Id); +   }     if (fp->use_const_buffer)        update_constant_buffer(brw, fp->program.Base.Parameters, fp->const_buffer);  } @@ -428,7 +422,7 @@ static void emit_constant_buffer(struct brw_context *brw)   */  const struct brw_tracked_state brw_constant_buffer = {     .dirty = { -      .mesa = (_NEW_TRANSFORM|_NEW_PROJECTION),      /* plus fp and vp flags */ +      .mesa = _NEW_PROGRAM_CONSTANTS,        .brw  = (BRW_NEW_FRAGMENT_PROGRAM |  	       BRW_NEW_VERTEX_PROGRAM |  	       BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 81b0a45998..7ea2fc113c 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -135,8 +135,8 @@ dri_bo *brw_search_cache( struct brw_cache *cache,  			  void *aux_return);  void brw_state_cache_check_size( struct brw_context *brw ); -void brw_init_cache( struct brw_context *brw ); -void brw_destroy_cache( struct brw_context *brw ); +void brw_init_caches( struct brw_context *brw ); +void brw_destroy_caches( struct brw_context *brw );  /***********************************************************************   * brw_state_batch.c diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index d5b5166406..320d886c99 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -56,9 +56,9 @@   * incorrect program is run for the other instance.   */ +#include "main/imports.h"  #include "brw_state.h"  #include "intel_batchbuffer.h" -#include "main/imports.h"  /* XXX: Fixme - have to include these to get the sizes of the prog_key   * structs: @@ -69,8 +69,10 @@  #include "brw_sf.h"  #include "brw_gs.h" -static GLuint hash_key( const void *key, GLuint key_size, -			dri_bo **reloc_bufs, GLuint nr_reloc_bufs) + +static GLuint +hash_key(const void *key, GLuint key_size, +         dri_bo **reloc_bufs, GLuint nr_reloc_bufs)  {     GLuint *ikey = (GLuint *)key;     GLuint hash = 0, i; @@ -95,6 +97,7 @@ static GLuint hash_key( const void *key, GLuint key_size,     return hash;  } +  /**   * Marks a new buffer as being chosen for the given cache id.   */ @@ -111,6 +114,7 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id,     cache->brw->state.dirty.cache |= 1 << cache_id;  } +  static struct brw_cache_item *  search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,  	     GLuint hash, const void *key, GLuint key_size, @@ -143,7 +147,8 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,  } -static void rehash( struct brw_cache *cache ) +static void +rehash(struct brw_cache *cache)  {     struct brw_cache_item **items;     struct brw_cache_item *c, *next; @@ -164,15 +169,17 @@ static void rehash( struct brw_cache *cache )     cache->size = size;  } +  /**   * Returns the buffer object matching cache_id and key, or NULL.   */ -dri_bo *brw_search_cache( struct brw_cache *cache, -			  enum brw_cache_id cache_id, -			  const void *key, -			  GLuint key_size, -			  dri_bo **reloc_bufs, GLuint nr_reloc_bufs, -			  void *aux_return ) +dri_bo * +brw_search_cache(struct brw_cache *cache, +                 enum brw_cache_id cache_id, +                 const void *key, +                 GLuint key_size, +                 dri_bo **reloc_bufs, GLuint nr_reloc_bufs, +                 void *aux_return)  {     struct brw_cache_item *item;     GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); @@ -192,6 +199,7 @@ dri_bo *brw_search_cache( struct brw_cache *cache,     return item->bo;  } +  dri_bo *  brw_upload_cache( struct brw_cache *cache,  		  enum brw_cache_id cache_id, @@ -265,7 +273,9 @@ brw_upload_cache( struct brw_cache *cache,     return bo;  } -/* This doesn't really work with aux data.  Use search/upload instead + +/** + * This doesn't really work with aux data.  Use search/upload instead   */  dri_bo *  brw_cache_data_sz(struct brw_cache *cache, @@ -296,6 +306,7 @@ brw_cache_data_sz(struct brw_cache *cache,     return bo;  } +  /**   * Wrapper around brw_cache_data_sz using the cache_id's canonical key size.   * @@ -319,21 +330,22 @@ enum pool_type {     DW_GENERAL_STATE  }; +  static void -brw_init_cache_id( struct brw_context *brw, -		const char *name, -		enum brw_cache_id id, -		GLuint key_size, -		GLuint aux_size) +brw_init_cache_id(struct brw_cache *cache, +                  const char *name, +                  enum brw_cache_id id, +                  GLuint key_size, +                  GLuint aux_size)  { -   struct brw_cache *cache = &brw->cache; -     cache->name[id] = strdup(name);     cache->key_size[id] = key_size;     cache->aux_size[id] = aux_size;  } -void brw_init_cache( struct brw_context *brw ) + +static void +brw_init_non_surface_cache(struct brw_context *brw)  {     struct brw_cache *cache = &brw->cache; @@ -342,114 +354,136 @@ void brw_init_cache( struct brw_context *brw )     cache->size = 7;     cache->n_items = 0;     cache->items = (struct brw_cache_item **) -      _mesa_calloc(cache->size *  -		   sizeof(struct brw_cache_item)); +      _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); -   brw_init_cache_id(brw, +   brw_init_cache_id(cache,  		     "CC_VP",  		     BRW_CC_VP,  		     sizeof(struct brw_cc_viewport),  		     0); -   brw_init_cache_id(brw, +   brw_init_cache_id(cache,  		     "CC_UNIT",  		     BRW_CC_UNIT,  		     sizeof(struct brw_cc_unit_state),  		     0); -   brw_init_cache_id(brw, +   brw_init_cache_id(cache,  		     "WM_PROG",  		     BRW_WM_PROG,  		     sizeof(struct brw_wm_prog_key),  		     sizeof(struct brw_wm_prog_data)); -   brw_init_cache_id(brw, +   brw_init_cache_id(cache,  		     "SAMPLER_DEFAULT_COLOR",  		     BRW_SAMPLER_DEFAULT_COLOR,  		     sizeof(struct brw_sampler_default_color),  		     0); -   brw_init_cache_id(brw, +   brw_init_cache_id(cache,  		     "SAMPLER",  		     BRW_SAMPLER,  		     0,		/* variable key/data size */  		     0); -   brw_init_cache_id(brw, +   brw_init_cache_id(cache,  		     "WM_UNIT",  		     BRW_WM_UNIT,  		     sizeof(struct brw_wm_unit_state),  		     0); -   brw_init_cache_id(brw, +   brw_init_cache_id(cache,  		     "SF_PROG",  		     BRW_SF_PROG,  		     sizeof(struct brw_sf_prog_key),  		     sizeof(struct brw_sf_prog_data)); -   brw_init_cache_id(brw, +   brw_init_cache_id(cache,  		     "SF_VP",  		     BRW_SF_VP,  		     sizeof(struct brw_sf_viewport),  		     0); -   brw_init_cache_id(brw, +   brw_init_cache_id(cache,  		     "SF_UNIT",  		     BRW_SF_UNIT,  		     sizeof(struct brw_sf_unit_state),  		     0); -   brw_init_cache_id(brw, +   brw_init_cache_id(cache,  		     "VS_UNIT",  		     BRW_VS_UNIT,  		     sizeof(struct brw_vs_unit_state),  		     0); -   brw_init_cache_id(brw, +   brw_init_cache_id(cache,  		     "VS_PROG",  		     BRW_VS_PROG,  		     sizeof(struct brw_vs_prog_key),  		     sizeof(struct brw_vs_prog_data)); -   brw_init_cache_id(brw, +   brw_init_cache_id(cache,  		     "CLIP_UNIT",  		     BRW_CLIP_UNIT,  		     sizeof(struct brw_clip_unit_state),  		     0); -   brw_init_cache_id(brw, +   brw_init_cache_id(cache,  		     "CLIP_PROG",  		     BRW_CLIP_PROG,  		     sizeof(struct brw_clip_prog_key),  		     sizeof(struct brw_clip_prog_data)); -   brw_init_cache_id(brw, +   brw_init_cache_id(cache,  		     "GS_UNIT",  		     BRW_GS_UNIT,  		     sizeof(struct brw_gs_unit_state),  		     0); -   brw_init_cache_id(brw, +   brw_init_cache_id(cache,  		     "GS_PROG",  		     BRW_GS_PROG,  		     sizeof(struct brw_gs_prog_key),  		     sizeof(struct brw_gs_prog_data)); +} + + +static void +brw_init_surface_cache(struct brw_context *brw) +{ +   struct brw_cache *cache = &brw->surface_cache; -   brw_init_cache_id(brw, +   cache->brw = brw; + +   cache->size = 7; +   cache->n_items = 0; +   cache->items = (struct brw_cache_item **) +      _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); + +   brw_init_cache_id(cache,  		     "SS_SURFACE",  		     BRW_SS_SURFACE,  		     sizeof(struct brw_surface_state),  		     0); -   brw_init_cache_id(brw, +   brw_init_cache_id(cache,  		     "SS_SURF_BIND",  		     BRW_SS_SURF_BIND,  		     0,  		     0);  } + +void +brw_init_caches(struct brw_context *brw) +{ +   brw_init_non_surface_cache(brw); +   brw_init_surface_cache(brw); +} + +  static void -brw_clear_cache( struct brw_context *brw ) +brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)  {     struct brw_cache_item *c, *next;     GLuint i; @@ -457,8 +491,8 @@ brw_clear_cache( struct brw_context *brw )     if (INTEL_DEBUG & DEBUG_STATE)        _mesa_printf("%s\n", __FUNCTION__); -   for (i = 0; i < brw->cache.size; i++) { -      for (c = brw->cache.items[i]; c; c = next) { +   for (i = 0; i < cache->size; i++) { +      for (c = cache->items[i]; c; c = next) {  	 int j;  	 next = c->next; @@ -468,10 +502,10 @@ brw_clear_cache( struct brw_context *brw )  	 free((void *)c->key);  	 free(c);        } -      brw->cache.items[i] = NULL; +      cache->items[i] = NULL;     } -   brw->cache.n_items = 0; +   cache->n_items = 0;     if (brw->curbe.last_buf) {        _mesa_free(brw->curbe.last_buf); @@ -483,25 +517,40 @@ brw_clear_cache( struct brw_context *brw )     brw->state.dirty.cache |= ~0;  } -void brw_state_cache_check_size( struct brw_context *brw ) + +void +brw_state_cache_check_size(struct brw_context *brw)  {     /* un-tuned guess.  We've got around 20 state objects for a total of around      * 32k, so 1000 of them is around 1.5MB.      */     if (brw->cache.n_items > 1000) -      brw_clear_cache(brw); +      brw_clear_cache(brw, &brw->cache); + +   if (brw->surface_cache.n_items > 1000) +      brw_clear_cache(brw, &brw->surface_cache);  } -void brw_destroy_cache( struct brw_context *brw ) + +static void +brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)  {     GLuint i; -   brw_clear_cache(brw); +   brw_clear_cache(brw, cache);     for (i = 0; i < BRW_MAX_CACHE; i++) { -      dri_bo_unreference(brw->cache.last_bo[i]); -      free(brw->cache.name[i]); +      dri_bo_unreference(cache->last_bo[i]); +      free(cache->name[i]);     } -   free(brw->cache.items); -   brw->cache.items = NULL; -   brw->cache.size = 0; +   free(cache->items); +   cache->items = NULL; +   cache->size = 0; +} + + +void +brw_destroy_caches(struct brw_context *brw) +{ +   brw_destroy_cache(brw, &brw->cache); +   brw_destroy_cache(brw, &brw->surface_cache);  } diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 5de1450e61..2641bcb2aa 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -59,7 +59,6 @@ const struct brw_tracked_state *atoms[] =     &brw_curbe_offsets,     &brw_recalculate_urb_fence, -     &brw_cc_vp,     &brw_cc_unit, @@ -88,54 +87,26 @@ const struct brw_tracked_state *atoms[] =     &brw_line_stipple,     &brw_aa_line_parameters, -   /* Ordering of the commands below is documented as fixed.   -    */ -#if 0 -   &brw_pipelined_state_pointers, -   &brw_urb_fence, -   &brw_constant_buffer_state, -#else +     &brw_psp_urb_cbs, -#endif     &brw_drawing_rect,     &brw_indices,     &brw_vertices, -   NULL,			/* brw_constant_buffer */ +   &brw_constant_buffer  };  void brw_init_state( struct brw_context *brw )  { -   GLuint i; - -   brw_init_cache(brw); - -   brw->state.atoms = _mesa_malloc(sizeof(atoms)); -   brw->state.nr_atoms = sizeof(atoms)/sizeof(*atoms); -   _mesa_memcpy(brw->state.atoms, atoms, sizeof(atoms)); - -   /* Patch in a pointer to the dynamic state atom: -    */ -   for (i = 0; i < brw->state.nr_atoms; i++) -      if (brw->state.atoms[i] == NULL) -	 brw->state.atoms[i] = &brw->curbe.tracked_state; - -   _mesa_memcpy(&brw->curbe.tracked_state,  -		&brw_constant_buffer, -		sizeof(brw_constant_buffer)); +   brw_init_caches(brw);  }  void brw_destroy_state( struct brw_context *brw )  { -   if (brw->state.atoms) { -      _mesa_free(brw->state.atoms); -      brw->state.atoms = NULL; -   } - -   brw_destroy_cache(brw); +   brw_destroy_caches(brw);     brw_destroy_batch_cache(brw);  } @@ -218,6 +189,7 @@ static struct dirty_bit_map mesa_bits[] = {     DEFINE_BIT(_NEW_MULTISAMPLE),     DEFINE_BIT(_NEW_TRACK_MATRIX),     DEFINE_BIT(_NEW_PROGRAM), +   DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),     {0, 0, 0}  }; @@ -336,7 +308,7 @@ void brw_validate_state( struct brw_context *brw )     /* do prepare stage for all atoms */     for (i = 0; i < Elements(atoms); i++) { -      const struct brw_tracked_state *atom = brw->state.atoms[i]; +      const struct brw_tracked_state *atom = atoms[i];        if (brw->intel.Fallback)           break; @@ -367,8 +339,8 @@ void brw_upload_state(struct brw_context *brw)        _mesa_memset(&examined, 0, sizeof(examined));        prev = *state; -      for (i = 0; i < brw->state.nr_atoms; i++) {	  -	 const struct brw_tracked_state *atom = brw->state.atoms[i]; +      for (i = 0; i < Elements(atoms); i++) {	  +	 const struct brw_tracked_state *atom = atoms[i];  	 struct brw_state_flags generated;  	 assert(atom->dirty.mesa || @@ -397,7 +369,7 @@ void brw_upload_state(struct brw_context *brw)     }     else {        for (i = 0; i < Elements(atoms); i++) {	  -	 const struct brw_tracked_state *atom = brw->state.atoms[i]; +	 const struct brw_tracked_state *atom = atoms[i];  	 if (brw->intel.Fallback)  	    break; diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index b69616d6e5..3fdc48583b 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -69,13 +69,18 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )  {     GLuint i, reg = 0, mrf; -#if 0 -   if (c->vp->program.Base.Parameters->NumParameters >= 6) -      c->vp->use_const_buffer = 1; +   /* Determine whether to use a real constant buffer or use a block +    * of GRF registers for constants.  The later is faster but only +    * works if everything fits in the GRF. +    * XXX this heuristic/check may need some fine tuning... +    */ +   if (c->vp->program.Base.Parameters->NumParameters + +       c->vp->program.Base.NumTemporaries + 20 > BRW_MAX_GRF) +      c->vp->use_const_buffer = GL_TRUE;     else -#endif        c->vp->use_const_buffer = GL_FALSE; -   /*printf("use_const_buffer = %d\n", c->use_const_buffer);*/ + +   /*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/     /* r0 -- reserved as usual      */ diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index f0d31fc1dd..2f80a60c12 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -240,15 +240,18 @@ struct brw_wm_compile {     GLuint max_wm_grf;     GLuint last_scratch; +   GLuint cur_inst;  /**< index of current instruction */ +     /** Mapping from Mesa registers to hardware registers */     struct {        GLboolean inited;        struct brw_reg reg;     } wm_regs[PROGRAM_PAYLOAD+1][256][4]; +   GLboolean used_grf[BRW_WM_MAX_GRF]; +   GLuint first_free_grf;     struct brw_reg stack;     struct brw_reg emit_mask_reg; -   GLuint reg_index;  /**< Index of next free GRF register */     GLuint tmp_regs[BRW_WM_MAX_GRF];     GLuint tmp_index;     GLuint tmp_max; diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 117460842a..39ea95f6fc 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -1,5 +1,7 @@  #include "main/macros.h"  #include "shader/prog_parameter.h" +#include "shader/prog_print.h" +#include "shader/prog_optimize.h"  #include "brw_context.h"  #include "brw_eu.h"  #include "brw_wm.h" @@ -42,6 +44,76 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)  } + +static void +reclaim_temps(struct brw_wm_compile *c); + + +/** Mark GRF register as used. */ +static void +prealloc_grf(struct brw_wm_compile *c, int r) +{ +   c->used_grf[r] = GL_TRUE; +} + + +/** Mark given GRF register as not in use. */ +static void +release_grf(struct brw_wm_compile *c, int r) +{ +   /*assert(c->used_grf[r]);*/ +   c->used_grf[r] = GL_FALSE; +   c->first_free_grf = MIN2(c->first_free_grf, r); +} + + +/** Return index of a free GRF, mark it as used. */ +static int +alloc_grf(struct brw_wm_compile *c) +{ +   GLuint r; +   for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { +      if (!c->used_grf[r]) { +         c->used_grf[r] = GL_TRUE; +         c->first_free_grf = r + 1;  /* a guess */ +         return r; +      } +   } + +   /* no free temps, try to reclaim some */ +   reclaim_temps(c); +   c->first_free_grf = 0; + +   /* try alloc again */ +   for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { +      if (!c->used_grf[r]) { +         c->used_grf[r] = GL_TRUE; +         c->first_free_grf = r + 1;  /* a guess */ +         return r; +      } +   } + +   for (r = 0; r < BRW_WM_MAX_GRF; r++) { +      assert(c->used_grf[r]); +   } +   /*printf("Really out of temp regs!\n");*/ +   return 60; +} + + +/** Return number of GRF registers used */ +static int +num_grf_used(const struct brw_wm_compile *c) +{ +   int r; +   for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--) +      if (c->used_grf[r]) +         return r + 1; +   return 0; +} + + +  /**   * Record the mapping of a Mesa register to a hardware register.   */ @@ -68,11 +140,18 @@ static int get_scalar_dst_index(const struct prog_instruction *inst)  static struct brw_reg alloc_tmp(struct brw_wm_compile *c)  {      struct brw_reg reg; -    if(c->tmp_index == c->tmp_max) -	c->tmp_regs[ c->tmp_max++ ] = c->reg_index++; -     + +    /* if we need to allocate another temp, grow the tmp_regs[] array */ +    if (c->tmp_index == c->tmp_max) { +       c->tmp_regs[ c->tmp_max++ ] = alloc_grf(c); +    } + +    /* form the GRF register */      reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0); +    /*printf("alloc_temp %d\n", reg.nr);*/ +    assert(reg.nr < BRW_WM_MAX_GRF);      return reg; +  }  /** @@ -130,35 +209,26 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component,  	    return brw_null_reg();      } +    assert(index < 256);      /* see if we've already allocated a HW register for this Mesa register */      if (c->wm_regs[file][index][component].inited) { -	/* yes, re-use */ -	reg = c->wm_regs[file][index][component].reg; +       /* yes, re-use */ +       reg = c->wm_regs[file][index][component].reg;      }      else {  	/* no, allocate new register */ -	reg = brw_vec8_grf(c->reg_index, 0); -    } +       int grf = alloc_grf(c); +       if (grf < 0) { +          /* totally out of temps */ +          grf = 70; /* XXX !!!! */ +       } -    /* if this is a new register allocation, record it in the table */ -    if (!c->wm_regs[file][index][component].inited) { -	set_reg(c, file, index, component, reg); -	c->reg_index++; -    } +       reg = brw_vec8_grf(grf, 0); +       /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/ -    if (c->reg_index >= BRW_WM_MAX_GRF - 12) { -	/* ran out of temporary registers! */ -#if 1 -        /* This is a big hack for now. -         * Return bad register index, just don't hang the GPU. -         */ -        _mesa_fprintf(stderr, "out of regs %d\n", c->reg_index); -        c->reg_index = BRW_WM_MAX_GRF - 13; -#else -	return brw_null_reg(); -#endif +       set_reg(c, file, index, component, reg);      } -  +      if (neg & (1 << component)) {  	reg = negate(reg);      } @@ -168,6 +238,46 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component,  } + +/** + * This is called if we run out of GRF registers.  Examine the live intervals + * of temp regs in the program and free those which won't be used again. + */ +static void +reclaim_temps(struct brw_wm_compile *c) +{ +   GLint intBegin[MAX_PROGRAM_TEMPS]; +   GLint intEnd[MAX_PROGRAM_TEMPS]; +   int index; + +   /*printf("Reclaim temps:\n");*/ + +   _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns, +                             intBegin, intEnd); + +   for (index = 0; index < MAX_PROGRAM_TEMPS; index++) { +      if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) { +         /* program temp[i] can be freed */ +         int component; +         /*printf("  temp[%d] is dead\n", index);*/ +         for (component = 0; component < 4; component++) { +            if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) { +               int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr; +               release_grf(c, r); +               /* +               printf("  Reclaim temp %d, reg %d at inst %d\n", +                      index, r, c->cur_inst); +               */ +               c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE; +            } +         } +      } +   } +} + + + +  /**   * Preallocate registers.  This sets up the Mesa to hardware register   * mapping for certain registers, such as constants (uniforms/state vars) @@ -179,6 +289,10 @@ static void prealloc_reg(struct brw_wm_compile *c)      struct brw_reg reg;      int nr_interp_regs = 0;      GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted; +    GLuint reg_index = 0; + +    memset(c->used_grf, GL_FALSE, sizeof(c->used_grf)); +    c->first_free_grf = 0;      for (i = 0; i < 4; i++) {          if (i < c->key.nr_depth_regs)  @@ -187,14 +301,20 @@ static void prealloc_reg(struct brw_wm_compile *c)              reg = brw_vec8_grf(0, 0);  	set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg);      } -    c->reg_index += 2 * c->key.nr_depth_regs; +    reg_index += 2 * c->key.nr_depth_regs;      /* constants */      { -        const int nr_params = c->fp->program.Base.Parameters->NumParameters; +        const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters; +        const GLuint nr_temps = c->fp->program.Base.NumTemporaries;          /* use a real constant buffer, or just use a section of the GRF? */ -        c->fp->use_const_buffer = GL_FALSE; /* (nr_params > 8);*/ +        /* XXX this heuristic may need adjustment... */ +        if ((nr_params + nr_temps) * 4 + reg_index > 80) +           c->fp->use_const_buffer = GL_TRUE; +        else +           c->fp->use_const_buffer = GL_FALSE; +        /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/          if (c->fp->use_const_buffer) {             /* We'll use a real constant buffer and fetch constants from @@ -216,7 +336,7 @@ static void prealloc_reg(struct brw_wm_compile *c)             for (i = 0; i < nr_params; i++) {                /* loop over XYZW channels */                for (j = 0; j < 4; j++, index++) { -                 reg = brw_vec1_grf(c->reg_index + index / 8, index % 8); +                 reg = brw_vec1_grf(reg_index + index / 8, index % 8);                   /* Save pointer to parameter/constant value.                    * Constants will be copied in prepare_constant_buffer()                    */ @@ -226,7 +346,7 @@ static void prealloc_reg(struct brw_wm_compile *c)             }             /* number of constant regs used (each reg is float[8]) */             c->nr_creg = 2 * ((4 * nr_params + 15) / 16); -           c->reg_index += c->nr_creg; +           reg_index += c->nr_creg;          }      } @@ -234,20 +354,24 @@ static void prealloc_reg(struct brw_wm_compile *c)      for (i = 0; i < FRAG_ATTRIB_MAX; i++) {  	if (inputs & (1<<i)) {  	    nr_interp_regs++; -	    reg = brw_vec8_grf(c->reg_index, 0); +	    reg = brw_vec8_grf(reg_index, 0);  	    for (j = 0; j < 4; j++)  		set_reg(c, PROGRAM_PAYLOAD, i, j, reg); -	    c->reg_index += 2; +	    reg_index += 2;  	}      }      c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;      c->prog_data.urb_read_length = nr_interp_regs * 2;      c->prog_data.curb_read_length = c->nr_creg; -    c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); -    c->reg_index++; -    c->stack =  brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); -    c->reg_index += 2; +    c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); +    reg_index++; +    c->stack =  brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); +    reg_index += 2; + +    /* mark GRF regs [0..reg_index-1] as in-use */ +    for (i = 0; i < reg_index; i++) +       prealloc_grf(c, i);      /* An instruction may reference up to three constants.       * They'll be found in these registers. @@ -256,7 +380,7 @@ static void prealloc_reg(struct brw_wm_compile *c)      if (c->fp->use_const_buffer) {         for (i = 0; i < 3; i++) {            c->current_const[i].index = -1; -          c->current_const[i].reg = alloc_tmp(c); +          c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0);         }      }  #if 0 @@ -2595,7 +2719,6 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)      struct brw_compile *p = &c->func;      struct brw_indirect stack_index = brw_indirect(0, 0); -    c->reg_index = 0;      prealloc_reg(c);      brw_set_compression_control(p, BRW_COMPRESSION_NONE);      brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); @@ -2603,6 +2726,8 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)      for (i = 0; i < c->nr_fp_insns; i++) {          const struct prog_instruction *inst = &c->prog_instructions[i]; +        c->cur_inst = i; +  #if 0          _mesa_printf("Inst %d: ", i);          _mesa_print_instruction(inst); @@ -2833,17 +2958,13 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)  		_mesa_printf("unsupported IR in fragment shader %d\n",  			inst->Opcode);  	} +  	if (inst->CondUpdate)  	    brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);  	else  	    brw_set_predicate_control(p, BRW_PREDICATE_NONE);      }      post_wm_emit(c); - -    if (c->reg_index >= BRW_WM_MAX_GRF) { -        _mesa_problem(NULL, "Ran out of registers in brw_wm_emit_glsl()"); -        /* XXX we need to do some proper error recovery here */ -    }  } @@ -2867,6 +2988,6 @@ void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)          brw_wm_print_program(c, "brw_wm_glsl_emit done");      } -    c->prog_data.total_grf = c->reg_index; +    c->prog_data.total_grf = num_grf_used(c);      c->prog_data.total_scratch = 0;  } diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 805df8a4af..f646ee7fc3 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -268,7 +268,7 @@ brw_create_texture_surface( struct brw_context *brw,        surf.ss0.cube_neg_z = 1;     } -   bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, +   bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,  			 key, sizeof(*key),  			 &key->bo, key->bo ? 1 : 0,  			 &surf, sizeof(surf), @@ -321,10 +321,11 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )     key.tiling = intelObj->mt->region->tiling;     dri_bo_unreference(brw->wm.surf_bo[surf]); -   brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, -                                         &key, sizeof(key), -                                         &key.bo, key.bo ? 1 : 0, -                                         NULL); +   brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, +                                            BRW_SS_SURFACE, +                                            &key, sizeof(key), +                                            &key.bo, key.bo ? 1 : 0, +                                            NULL);     if (brw->wm.surf_bo[surf] == NULL) {        brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key);     } @@ -362,7 +363,7 @@ brw_create_constant_surface( struct brw_context *brw,     surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */     brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */ -   bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, +   bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,  			 key, sizeof(*key),  			 &key->bo, key->bo ? 1 : 0,  			 &surf, sizeof(surf), @@ -427,7 +428,8 @@ brw_update_wm_constant_surface( GLcontext *ctx,     */     dri_bo_unreference(brw->wm.surf_bo[surf]); -   brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, +   brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, +                                            BRW_SS_SURFACE,                                              &key, sizeof(key),                                              &key.bo, key.bo ? 1 : 0,                                              NULL); @@ -456,17 +458,14 @@ brw_update_vs_constant_surface( GLcontext *ctx,     assert(surf == 0); -   /* free old const buffer if too small */ -   if (const_buffer && const_buffer->size < size) { -      dri_bo_unreference(const_buffer); -      const_buffer = NULL; -   } +   /* We always create a new VS constant buffer so that several can be +    * in flight at a time.  Free the old one first... +    */ +   dri_bo_unreference(const_buffer); -   /* alloc new buffer if needed */ -   if (!const_buffer) { -      const_buffer = -         drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", size, 64); -   } +   /* alloc new buffer */ +   const_buffer = +      drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", size, 64);     memset(&key, 0, sizeof(key)); @@ -487,7 +486,8 @@ brw_update_vs_constant_surface( GLcontext *ctx,     */     dri_bo_unreference(brw->vs.surf_bo[surf]); -   brw->vs.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, +   brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache, +                                            BRW_SS_SURFACE,                                              &key, sizeof(key),                                              &key.bo, key.bo ? 1 : 0,                                              NULL); @@ -569,10 +569,11 @@ brw_update_renderbuffer_surface(struct brw_context *brw,     dri_bo_unreference(brw->wm.surf_bo[unit]);     brw->wm.surf_bo[unit] = NULL;     if (cached)  -       brw->wm.surf_bo[unit] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, -	       &key, sizeof(key), -	       ®ion_bo, 1, -	       NULL); +       brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache, +                                                BRW_SS_SURFACE, +                                                &key, sizeof(key), +                                                ®ion_bo, 1, +                                                NULL);     if (brw->wm.surf_bo[unit] == NULL) {        struct brw_surface_state surf; @@ -598,7 +599,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw,        surf.ss0.writedisable_alpha = !key.color_mask[3];        /* Key size will never match key size for textures, so we're safe. */ -      brw->wm.surf_bo[unit] = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, +      brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache, +                                               BRW_SS_SURFACE,                                                 &key, sizeof(key),  					       ®ion_bo, 1,  					       &surf, sizeof(surf), @@ -630,7 +632,7 @@ brw_wm_get_binding_table(struct brw_context *brw)     assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF); -   bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND, +   bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,  			      NULL, 0,  			      brw->wm.surf_bo, brw->wm.nr_surfaces,  			      NULL); @@ -646,7 +648,7 @@ brw_wm_get_binding_table(struct brw_context *brw)           else              data[i] = 0; -      bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND, +      bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,  				  NULL, 0,  				  brw->wm.surf_bo, brw->wm.nr_surfaces,  				  data, data_size, @@ -746,7 +748,7 @@ brw_vs_get_binding_table(struct brw_context *brw)     assert(brw->vs.nr_surfaces <= BRW_VS_MAX_SURF); -   bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND, +   bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,  			      NULL, 0,  			      brw->vs.surf_bo, brw->vs.nr_surfaces,  			      NULL); @@ -762,7 +764,7 @@ brw_vs_get_binding_table(struct brw_context *brw)           else              data[i] = 0; -      bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND, +      bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,  				  NULL, 0,  				  brw->vs.surf_bo, brw->vs.nr_surfaces,  				  data, data_size, @@ -787,8 +789,7 @@ brw_vs_get_binding_table(struct brw_context *brw)  /** - * Vertex shader surfaces.  Just constant buffer for now.  Could add vertex  - * shader textures in the future. + * Vertex shader surfaces (constant buffer).   */  static void prepare_vs_surfaces(struct brw_context *brw )  { @@ -824,8 +825,12 @@ prepare_surfaces(struct brw_context *brw)  const struct brw_tracked_state brw_wm_surfaces = {     .dirty = { -      .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS | _NEW_PROGRAM, -      .brw = BRW_NEW_CONTEXT, +      .mesa = (_NEW_COLOR | +               _NEW_TEXTURE | +               _NEW_BUFFERS | +               _NEW_PROGRAM | +               _NEW_PROGRAM_CONSTANTS), +      .brw = (BRW_NEW_CONTEXT),        .cache = 0     },     .prepare = prepare_surfaces, diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c index 2fcc87c0f5..28ba5f49bc 100644 --- a/src/mesa/drivers/dri/r200/r200_state.c +++ b/src/mesa/drivers/dri/r200/r200_state.c @@ -2484,7 +2484,7 @@ void r200ValidateState( GLcontext *ctx )       r200UpdateDrawBuffer(ctx);     } -   if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM)) { +   if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)) {        r200UpdateTextureState( ctx );        new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */        r200UpdateLocalViewer( ctx ); @@ -2523,6 +2523,7 @@ void r200ValidateState( GLcontext *ctx )     }     if (new_state & (_NEW_PROGRAM| +                    _NEW_PROGRAM_CONSTANTS |     /* need to test for pretty much anything due to possible parameter bindings */  	_NEW_MODELVIEW|_NEW_PROJECTION|_NEW_TRANSFORM|  	_NEW_LIGHT|_NEW_TEXTURE|_NEW_TEXTURE_MATRIX| diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 873cde4414..2f45429cf2 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -470,7 +470,8 @@ void r300TranslateFragmentShader(r300ContextPtr r300,  			fp->translated = GL_TRUE;  		if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL))  			r300FragmentProgramDump(fp, &fp->code); -		r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM); +		r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM | +                                          _NEW_PROGRAM_CONSTANTS);  	}  	update_params(r300, fp); diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 79f0b3625c..b96ba4ed94 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1109,7 +1109,7 @@ void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state)  	struct gl_program_parameter_list *paramList;  	GLuint i; -	if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM))) +	if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)))  		return;  	fp = (struct r300_fragment_program *)ctx->FragmentProgram._Current; @@ -2357,11 +2357,12 @@ void r300UpdateShaders(r300ContextPtr rmesa)  			hw_tcl_on = future_hw_tcl_on = 0;  			r300ResetHwState(rmesa); -			r300UpdateStateParameters(ctx, _NEW_PROGRAM); +			r300UpdateStateParameters(ctx, _NEW_PROGRAM | +                                                  _NEW_PROGRAM_CONSTANTS);  			return;  		}  	} -	r300UpdateStateParameters(ctx, _NEW_PROGRAM); +	r300UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);  }  static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 292573de89..300559d0b4 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -501,7 +501,8 @@ void r500TranslateFragmentShader(r300ContextPtr r300,  		_mesa_reference_program(r300->radeon.glCtx, &compiler.program, 0); -		r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM); +		r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM | +                                          _NEW_PROGRAM_CONSTANTS);  		if (RADEON_DEBUG & DEBUG_PIXEL) {  			if (fp->translated) { diff --git a/src/mesa/shader/arbprogram.c b/src/mesa/shader/arbprogram.c index 981565ab8f..317d623a22 100644 --- a/src/mesa/shader/arbprogram.c +++ b/src/mesa/shader/arbprogram.c @@ -74,8 +74,6 @@ _mesa_BindProgram(GLenum target, GLuint id)     GET_CURRENT_CONTEXT(ctx);     ASSERT_OUTSIDE_BEGIN_END(ctx); -   FLUSH_VERTICES(ctx, _NEW_PROGRAM); -     /* Error-check target and get curProg */     if ((target == GL_VERTEX_PROGRAM_ARB) && /* == GL_VERTEX_PROGRAM_NV */          (ctx->Extensions.NV_vertex_program || @@ -132,6 +130,9 @@ _mesa_BindProgram(GLenum target, GLuint id)        return;     } +   /* signal new program (and its new constants) */ +   FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); +     /* bind newProg */     if (target == GL_VERTEX_PROGRAM_ARB) { /* == GL_VERTEX_PROGRAM_NV */        _mesa_reference_vertprog(ctx, &ctx->VertexProgram.Current, @@ -489,7 +490,7 @@ _mesa_ProgramEnvParameter4fARB(GLenum target, GLuint index,     GET_CURRENT_CONTEXT(ctx);     ASSERT_OUTSIDE_BEGIN_END(ctx); -   FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); +   FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);     if (target == GL_FRAGMENT_PROGRAM_ARB         && ctx->Extensions.ARB_fragment_program) { @@ -537,7 +538,7 @@ _mesa_ProgramEnvParameters4fvEXT(GLenum target, GLuint index, GLsizei count,     GLfloat * dest;     ASSERT_OUTSIDE_BEGIN_END(ctx); -   FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); +   FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);     if (count <= 0) {        _mesa_error(ctx, GL_INVALID_VALUE, "glProgramEnvParameters4fv(count)"); @@ -631,7 +632,7 @@ _mesa_ProgramLocalParameter4fARB(GLenum target, GLuint index,     struct gl_program *prog;     ASSERT_OUTSIDE_BEGIN_END(ctx); -   FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); +   FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);     if ((target == GL_FRAGMENT_PROGRAM_NV          && ctx->Extensions.NV_fragment_program) || @@ -685,7 +686,7 @@ _mesa_ProgramLocalParameters4fvEXT(GLenum target, GLuint index, GLsizei count,     GLint i;     ASSERT_OUTSIDE_BEGIN_END(ctx); -   FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); +   FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);     if (count <= 0) {        _mesa_error(ctx, GL_INVALID_VALUE, "glProgramLocalParameters4fv(count)"); diff --git a/src/mesa/shader/nvprogram.c b/src/mesa/shader/nvprogram.c index 5142c2a4a5..8ba521182b 100644 --- a/src/mesa/shader/nvprogram.c +++ b/src/mesa/shader/nvprogram.c @@ -706,7 +706,7 @@ _mesa_ProgramNamedParameter4fNV(GLuint id, GLsizei len, const GLubyte *name,     GET_CURRENT_CONTEXT(ctx);     ASSERT_OUTSIDE_BEGIN_END(ctx); -   FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); +   FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);     prog = _mesa_lookup_program(ctx, id);     if (!prog || prog->Target != GL_FRAGMENT_PROGRAM_NV) { diff --git a/src/mesa/shader/prog_optimize.c b/src/mesa/shader/prog_optimize.c index 6ba2e76ff9..a02f5efa41 100644 --- a/src/mesa/shader/prog_optimize.c +++ b/src/mesa/shader/prog_optimize.c @@ -547,15 +547,13 @@ update_interval(GLint intBegin[], GLint intEnd[], GLuint index, GLuint ic)  /** - * Find the live intervals for each temporary register in the program. - * For register R, the interval [A,B] indicates that R is referenced - * from instruction A through instruction B. - * Special consideration is needed for loops and subroutines. - * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason + * Find first/last instruction that references each temporary register.   */ -static GLboolean -find_live_intervals(struct gl_program *prog, -                    struct interval_list *liveIntervals) +GLboolean +_mesa_find_temp_intervals(const struct prog_instruction *instructions, +                          GLuint numInstructions, +                          GLint intBegin[MAX_PROGRAM_TEMPS], +                          GLint intEnd[MAX_PROGRAM_TEMPS])  {     struct loop_info     { @@ -563,26 +561,15 @@ find_live_intervals(struct gl_program *prog,     };     struct loop_info loopStack[MAX_LOOP_NESTING];     GLuint loopStackDepth = 0; -   GLint intBegin[MAX_PROGRAM_TEMPS], intEnd[MAX_PROGRAM_TEMPS];     GLuint i; -   /* -    * Note: we'll return GL_FALSE below if we find relative indexing -    * into the TEMP register file.  We can't handle that yet. -    * We also give up on subroutines for now. -    */ - -   if (dbg) { -      _mesa_printf("Optimize: Begin find intervals\n"); -   } -     for (i = 0; i < MAX_PROGRAM_TEMPS; i++){        intBegin[i] = intEnd[i] = -1;     }     /* Scan instructions looking for temporary registers */ -   for (i = 0; i < prog->NumInstructions; i++) { -      const struct prog_instruction *inst = prog->Instructions + i; +   for (i = 0; i < numInstructions; i++) { +      const struct prog_instruction *inst = instructions + i;        if (inst->Opcode == OPCODE_BGNLOOP) {           loopStack[loopStackDepth].Start = i;           loopStack[loopStackDepth].End = inst->BranchTarget; @@ -595,7 +582,7 @@ find_live_intervals(struct gl_program *prog,           return GL_FALSE;        }        else { -         const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); +         const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/           GLuint j;           for (j = 0; j < numSrc; j++) {              if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { @@ -624,6 +611,39 @@ find_live_intervals(struct gl_program *prog,        }     } +   return GL_TRUE; +} + + +/** + * Find the live intervals for each temporary register in the program. + * For register R, the interval [A,B] indicates that R is referenced + * from instruction A through instruction B. + * Special consideration is needed for loops and subroutines. + * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason + */ +static GLboolean +find_live_intervals(struct gl_program *prog, +                    struct interval_list *liveIntervals) +{ +   GLint intBegin[MAX_PROGRAM_TEMPS], intEnd[MAX_PROGRAM_TEMPS]; +   GLuint i; + +   /* +    * Note: we'll return GL_FALSE below if we find relative indexing +    * into the TEMP register file.  We can't handle that yet. +    * We also give up on subroutines for now. +    */ + +   if (dbg) { +      _mesa_printf("Optimize: Begin find intervals\n"); +   } + +   /* build intermediate arrays */ +   if (!_mesa_find_temp_intervals(prog->Instructions, prog->NumInstructions, +                                  intBegin, intEnd)) +      return GL_FALSE; +     /* Build live intervals list from intermediate arrays */     liveIntervals->Num = 0;     for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { @@ -794,6 +814,96 @@ _mesa_reallocate_registers(struct gl_program *prog) + + + + +#if 0 +static void +_mesa_find_temporary_live_intervals(struct gl_program *prog, +                                    GLint firstInst[MAX_PROGRAM_TEMPS], +                                    GLint lastInst[MAX_PROGRAM_TEMPS]) +{ +   GLuint i; + +   for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { +      firstInst[i] = lastInst[i] = -1; +   } + +   struct loop_info loopStack[MAX_LOOP_NESTING]; +   GLuint loopStackDepth = 0; +   GLint intBegin[MAX_PROGRAM_TEMPS], intEnd[MAX_PROGRAM_TEMPS]; +   GLuint i; + +   /* +    * Note: we'll return GL_FALSE below if we find relative indexing +    * into the TEMP register file.  We can't handle that yet. +    * We also give up on subroutines for now. +    */ + +   if (dbg) { +      _mesa_printf("Optimize: Begin find intervals\n"); +   } + +   for (i = 0; i < MAX_PROGRAM_TEMPS; i++){ +      intBegin[i] = intEnd[i] = -1; +   } + +   /* Scan instructions looking for temporary registers */ +   for (i = 0; i < prog->NumInstructions; i++) { +      const struct prog_instruction *inst = prog->Instructions + i; +      if (inst->Opcode == OPCODE_BGNLOOP) { +         loopStack[loopStackDepth].Start = i; +         loopStack[loopStackDepth].End = inst->BranchTarget; +         loopStackDepth++; +      } +      else if (inst->Opcode == OPCODE_ENDLOOP) { +         loopStackDepth--; +      } +      else if (inst->Opcode == OPCODE_CAL) { +         return GL_FALSE; +      } +      else { +         const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); +         GLuint j; +         for (j = 0; j < numSrc; j++) { +            if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { +               const GLuint index = inst->SrcReg[j].Index; +               if (inst->SrcReg[j].RelAddr) +                  return GL_FALSE; +               update_interval(intBegin, intEnd, index, i); +               if (loopStackDepth > 0) { +                  /* extend temp register's interval to end of loop */ +                  GLuint loopEnd = loopStack[loopStackDepth - 1].End; +                  update_interval(intBegin, intEnd, index, loopEnd); +               } +            } +         } +         if (inst->DstReg.File == PROGRAM_TEMPORARY) { +            const GLuint index = inst->DstReg.Index; +            if (inst->DstReg.RelAddr) +               return GL_FALSE; +            update_interval(intBegin, intEnd, index, i); +            if (loopStackDepth > 0) { +               /* extend temp register's interval to end of loop */ +               GLuint loopEnd = loopStack[loopStackDepth - 1].End; +               update_interval(intBegin, intEnd, index, loopEnd); +            } +         } +      } +   } + + + + +#endif + + + + + + +  /**   * Apply optimizations to the given program to eliminate unnecessary   * instructions, temp regs, etc. diff --git a/src/mesa/shader/prog_optimize.h b/src/mesa/shader/prog_optimize.h index d102cfd9fc..43894a2723 100644 --- a/src/mesa/shader/prog_optimize.h +++ b/src/mesa/shader/prog_optimize.h @@ -25,7 +25,19 @@  #ifndef PROG_OPT_H  #define PROG_OPT_H + +#include "main/config.h" + +  struct gl_program; +struct prog_instruction; + + +extern GLboolean +_mesa_find_temp_intervals(const struct prog_instruction *instructions, +                          GLuint numInstructions, +                          GLint intBegin[MAX_PROGRAM_TEMPS], +                          GLint intEnd[MAX_PROGRAM_TEMPS]);  extern void  _mesa_optimize_program(GLcontext *ctx, struct gl_program *program); diff --git a/src/mesa/shader/shader_api.c b/src/mesa/shader/shader_api.c index 644cd39185..8f414a0889 100644 --- a/src/mesa/shader/shader_api.c +++ b/src/mesa/shader/shader_api.c @@ -1487,7 +1487,7 @@ _mesa_use_program(GLcontext *ctx, GLuint program)        return;     } -   FLUSH_VERTICES(ctx, _NEW_PROGRAM); +   FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);     if (program) {        shProg = _mesa_lookup_shader_program_err(ctx, program, "glUseProgram"); @@ -1789,7 +1789,7 @@ _mesa_uniform(GLcontext *ctx, GLint location, GLsizei count,        return;     } -   FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); +   FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);     uniform = &shProg->Uniforms->Uniforms[location]; @@ -1929,7 +1929,7 @@ _mesa_uniform_matrix(GLcontext *ctx, GLint cols, GLint rows,        return;     } -   FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); +   FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);     uniform = &shProg->Uniforms->Uniforms[location]; | 
