diff options
| author | Eric Anholt <eric@anholt.net> | 2008-01-02 15:47:47 -0800 | 
|---|---|---|
| committer | Eric Anholt <eric@anholt.net> | 2008-01-02 15:51:49 -0800 | 
| commit | 9e9f6f105c81436cecfe55a8a80d2efe45bc0d72 (patch) | |
| tree | 48da7615917c365292561b35b72f1644e5491bf8 /src | |
| parent | 03b59edbb53a3887779b0eb9d0f07b93ec747764 (diff) | |
[965] Convert WM unit to use a cache key instead of brw_cache_data.
Diffstat (limited to 'src')
| -rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 1 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_state.c | 203 | 
2 files changed, 125 insertions, 79 deletions
| diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index fd18fcdc21..05111b351a 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -590,7 +590,6 @@ struct brw_context        GLuint max_threads;        dri_bo *scratch_buffer; -      GLuint scratch_buffer_size;        GLuint sampler_count;        dri_bo *sampler_bo; diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 8a7236e62f..02443c50d0 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -41,77 +41,96 @@   * WM unit - fragment programs and rasterization   */ -static void upload_wm_unit(struct brw_context *brw ) +struct brw_wm_unit_key { +   unsigned int total_grf, total_scratch; +   unsigned int urb_entry_read_length; +   unsigned int curb_entry_read_length; +   unsigned int dispatch_grf_start_reg; + +   unsigned int curbe_offset; +   unsigned int urb_size; + +   unsigned int max_threads; + +   unsigned int nr_surfaces, sampler_count; +   GLboolean uses_depth, computes_depth, uses_kill, is_glsl; +   GLboolean polygon_stipple, stats_wm; +}; + +static void +wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)  { +   const struct gl_fragment_program *fp = brw->fragment_program;     struct intel_context *intel = &brw->intel; -   struct brw_wm_unit_state wm; -   GLuint max_threads; -   GLuint per_thread; -   dri_bo *reloc_bufs[3];     if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) -      max_threads = 0;  +      key->max_threads = 1;     else -      max_threads = 31; +      key->max_threads = 32; +   /* CACHE_NEW_WM_PROG */ +   key->total_grf = brw->wm.prog_data->total_grf; +   key->urb_entry_read_length = brw->wm.prog_data->urb_read_length; +   key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; +   key->total_scratch = ALIGN(brw->wm.prog_data->total_scratch, 1024); -   memset(&wm, 0, sizeof(wm)); +   /* BRW_NEW_URB_FENCE */ +   key->urb_size = brw->urb.vsize; -   /* CACHE_NEW_WM_PROG */ -   wm.thread0.grf_reg_count = ALIGN(brw->wm.prog_data->total_grf, 16) / 16 - 1; -   wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */ -   wm.thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; -   wm.thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length; -   wm.thread3.const_urb_entry_read_length = brw->wm.prog_data->curb_read_length; +   /* CACHE_NEW_SURFACE */ +   key->nr_surfaces = brw->wm.nr_surfaces; -   wm.wm5.max_threads = max_threads;       +   /* CACHE_NEW_SAMPLER */ +   key->sampler_count = brw->wm.sampler_count; -   per_thread = ALIGN(brw->wm.prog_data->total_scratch, 1024); -   assert(per_thread <= 12 * 1024); +   /* _NEW_POLYGONSTIPPLE */ +   key->polygon_stipple = brw->attribs.Polygon->StippleFlag; -   if (brw->wm.prog_data->total_scratch) { -      GLuint total = per_thread * (max_threads + 1); +   /* BRW_NEW_FRAGMENT_PROGRAM */ +   key->uses_depth = (fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0; -      /* Scratch space -- just have to make sure there is sufficient -       * allocated for the active program and current number of threads. -       */ -      brw->wm.scratch_buffer_size = total; -      if (brw->wm.scratch_buffer && -	  brw->wm.scratch_buffer_size > brw->wm.scratch_buffer->size) { -	 dri_bo_unreference(brw->wm.scratch_buffer); -	 brw->wm.scratch_buffer = NULL; -      } -      if (!brw->wm.scratch_buffer) { -	 brw->wm.scratch_buffer = dri_bo_alloc(intel->bufmgr, -					       "wm scratch", -					       brw->wm.scratch_buffer_size, -					       4096, DRM_BO_FLAG_MEM_TT); -      } -   } +   /* as far as we can tell */ +   key->computes_depth = +      (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) != 0; -   /* CACHE_NEW_SURFACE */ -   wm.thread1.binding_table_entry_count = brw->wm.nr_surfaces; +   /* _NEW_COLOR */ +   key->uses_kill = fp->UsesKill || brw->attribs.Color->AlphaEnabled; +   key->is_glsl = brw_wm_is_glsl(fp); -   /* CACHE_NEW_WM_PROG */ -   if (per_thread != 0) { -   /* reloc */ +   /* XXX: This needs a flag to indicate when it changes. */ +   key->stats_wm = intel->stats_wm; +} + +static dri_bo * +wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, +			dri_bo **reloc_bufs) +{ +   struct brw_wm_unit_state wm; + +   memset(&wm, 0, sizeof(wm)); + +   wm.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; +   wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */ +   wm.thread1.depth_coef_urb_read_offset = 1; +   wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; +   wm.thread1.binding_table_entry_count = key->nr_surfaces; + +   if (key->total_scratch != 0) {        wm.thread2.scratch_space_base_pointer = -	 brw->wm.scratch_buffer->offset >> 10; -      wm.thread2.per_thread_scratch_space = per_thread / 1024 - 1; +	 brw->wm.scratch_buffer->offset >> 10; /* reloc */ +      wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1;     } else {        wm.thread2.scratch_space_base_pointer = 0;        wm.thread2.per_thread_scratch_space = 0;     } -   /* BRW_NEW_CURBE_OFFSETS */ -   wm.thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2; - +   wm.thread3.dispatch_grf_start_reg = key->dispatch_grf_start_reg; +   wm.thread3.urb_entry_read_length = key->urb_entry_read_length; +   wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length; +   wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;     wm.thread3.urb_entry_read_offset = 0; -   wm.thread1.depth_coef_urb_read_offset = 1; -   wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; -   /* CACHE_NEW_SAMPLER */ -   wm.wm4.sampler_count = (brw->wm.sampler_count + 1) / 4; +   wm.wm4.sampler_count = (key->sampler_count + 1) / 4;     if (brw->wm.sampler_bo != NULL) {        /* reloc */        wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5; @@ -119,27 +138,16 @@ static void upload_wm_unit(struct brw_context *brw )        wm.wm4.sampler_state_pointer = 0;     } -   /* BRW_NEW_FRAGMENT_PROGRAM */ -   { -      const struct gl_fragment_program *fp = brw->fragment_program;  +   wm.wm5.program_uses_depth = key->uses_depth; +   wm.wm5.program_computes_depth = key->computes_depth; +   wm.wm5.program_uses_killpixel = key->uses_kill; -      if (fp->Base.InputsRead & (1<<FRAG_ATTRIB_WPOS))  -	 wm.wm5.program_uses_depth = 1; /* as far as we can tell */ -    -      if (fp->Base.OutputsWritten & (1<<FRAG_RESULT_DEPR))  -	 wm.wm5.program_computes_depth = 1; -    -      /* _NEW_COLOR */ -      if (fp->UsesKill ||  -	  brw->attribs.Color->AlphaEnabled)  -	 wm.wm5.program_uses_killpixel = 1;  -       -      if (brw_wm_is_glsl(fp)) -	  wm.wm5.enable_8_pix = 1; -      else -	  wm.wm5.enable_16_pix = 1; -   } +   if (key->is_glsl) +      wm.wm5.enable_8_pix = 1; +   else +      wm.wm5.enable_16_pix = 1; +   wm.wm5.max_threads = key->max_threads - 1;     wm.wm5.thread_dispatch_enable = 1;	/* AKA: color_write */     wm.wm5.legacy_line_rast = 0;     wm.wm5.legacy_global_depth_bias = 0; @@ -147,9 +155,7 @@ static void upload_wm_unit(struct brw_context *brw )     wm.wm5.line_aa_region_width = 0;     wm.wm5.line_endcap_aa_region_width = 1; -   /* _NEW_POLYGONSTIPPLE */ -   if (brw->attribs.Polygon->StippleFlag)  -      wm.wm5.polygon_stipple = 1; +   wm.wm5.polygon_stipple = key->polygon_stipple;     /* _NEW_POLYGON */     if (brw->attribs.Polygon->OffsetFill) { @@ -171,20 +177,61 @@ static void upload_wm_unit(struct brw_context *brw )        wm.wm5.line_stipple = 1;     } -   if (INTEL_DEBUG & DEBUG_STATS || intel->stats_wm) +   if (INTEL_DEBUG & DEBUG_STATS || key->stats_wm)        wm.wm4.stats_enable = 1; -   reloc_bufs[0] = brw->wm.prog_bo; -   reloc_bufs[1] = brw->wm.scratch_buffer; -   reloc_bufs[2] = brw->wm.sampler_bo; -     brw->wm.thread0_delta = wm.thread0.grf_reg_count << 1;     brw->wm.thread2_delta = wm.thread2.per_thread_scratch_space;     brw->wm.wm4_delta = wm.wm4.stats_enable | (wm.wm4.sampler_count << 2); +   return brw_upload_cache(&brw->cache, BRW_WM_UNIT, +			   key, sizeof(*key), +			   reloc_bufs, 3, +			   &wm, sizeof(wm), +			   NULL, NULL); +} + + +static void upload_wm_unit( struct brw_context *brw ) +{ +   struct intel_context *intel = &brw->intel; +   struct brw_wm_unit_key key; +   dri_bo *reloc_bufs[3]; + +   wm_unit_populate_key(brw, &key); + +   /* Allocate the necessary scratch space if we haven't already.  Don't +    * bother reducing the allocation later, since we use scratch so +    * rarely. +    */ +   assert(key.total_scratch <= 12 * 1024); +   if (key.total_scratch) { +      GLuint total = key.total_scratch * key.max_threads; + +      if (brw->wm.scratch_buffer && total > brw->wm.scratch_buffer->size) { +	 dri_bo_unreference(brw->wm.scratch_buffer); +	 brw->wm.scratch_buffer = NULL; +      } +      if (brw->wm.scratch_buffer == NULL) { +	 brw->wm.scratch_buffer = dri_bo_alloc(intel->bufmgr, +					       "wm scratch", +					       total, +					       4096, DRM_BO_FLAG_MEM_TT); +      } +   } + +   reloc_bufs[0] = brw->wm.prog_bo; +   reloc_bufs[1] = brw->wm.scratch_buffer; +   reloc_bufs[2] = brw->wm.sampler_bo; +     dri_bo_unreference(brw->wm.state_bo); -   brw->wm.state_bo = brw_cache_data( &brw->cache, BRW_WM_UNIT, &wm, -				      reloc_bufs, 3 ); +   brw->wm.state_bo = brw_search_cache(&brw->cache, BRW_WM_UNIT, +				       &key, sizeof(key), +				       reloc_bufs, 3, +				       NULL); +   if (brw->wm.state_bo == NULL) { +      brw->wm.state_bo = wm_unit_create_from_key(brw, &key, reloc_bufs); +   }  }  static void emit_reloc_wm_unit(struct brw_context *brw) | 
