diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_vs_state.c')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vs_state.c | 115 |
1 files changed, 88 insertions, 27 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index c225bf8f5c..942581696d 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -34,62 +34,123 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "macros.h" +#include "main/macros.h" -static void upload_vs_unit( struct brw_context *brw ) -{ - struct brw_vs_unit_state vs; - - memset(&vs, 0, sizeof(vs)); +struct brw_vs_unit_key { + unsigned int total_grf; + unsigned int urb_entry_read_length; + unsigned int curb_entry_read_length; - /* CACHE_NEW_VS_PROG */ - vs.thread0.kernel_start_pointer = brw->vs.prog_gs_offset >> 6; - vs.thread0.grf_reg_count = ((brw->vs.prog_data->total_grf-1) & ~15) / 16; - vs.thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length; - vs.thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length; - vs.thread3.dispatch_grf_start_reg = 1; + unsigned int curbe_offset; + unsigned int nr_urb_entries, urb_size; +}; - /* BRW_NEW_URB_FENCE */ - vs.thread4.nr_urb_entries = brw->urb.nr_vs_entries; - vs.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; - vs.thread4.max_threads = MIN2( - MAX2(0, (brw->urb.nr_vs_entries - 6) / 2 - 1), - 15); - +static void +vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key) +{ + memset(key, 0, sizeof(*key)); + /* CACHE_NEW_VS_PROG */ + key->total_grf = brw->vs.prog_data->total_grf; + key->urb_entry_read_length = brw->vs.prog_data->urb_read_length; + key->curb_entry_read_length = brw->vs.prog_data->curb_read_length; - if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) - vs.thread4.max_threads = 0; + /* BRW_NEW_URB_FENCE */ + key->nr_urb_entries = brw->urb.nr_vs_entries; + key->urb_size = brw->urb.vsize; /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */ if (brw->attribs.Transform->ClipPlanesEnabled) { /* Note that we read in the userclip planes as well, hence * clip_start: */ - vs.thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2; + key->curbe_offset = brw->curbe.clip_start; } else { - vs.thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2; + key->curbe_offset = brw->curbe.vs_start; } +} + +static dri_bo * +vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) +{ + struct brw_vs_unit_state vs; + dri_bo *bo; + int chipset_max_threads; + memset(&vs, 0, sizeof(vs)); + + vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */ + vs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + /* Choosing multiple program flow means that we may get 2-vertex threads, + * which will have the channel mask for dwords 4-7 enabled in the thread, + * and those dwords will be written to the second URB handle when we + * brw_urb_WRITE() results. + */ + vs.thread1.single_program_flow = 0; + vs.thread3.urb_entry_read_length = key->urb_entry_read_length; + vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length; + vs.thread3.dispatch_grf_start_reg = 1; vs.thread3.urb_entry_read_offset = 0; + vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; + + vs.thread4.nr_urb_entries = key->nr_urb_entries; + vs.thread4.urb_entry_allocation_size = key->urb_size - 1; + + if (BRW_IS_G4X(brw)) + chipset_max_threads = 32; + else + chipset_max_threads = 16; + vs.thread4.max_threads = CLAMP(key->nr_urb_entries / 2, + 1, chipset_max_threads) - 1; + + if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) + vs.thread4.max_threads = 0; /* No samplers for ARB_vp programs: */ vs.vs5.sampler_count = 0; if (INTEL_DEBUG & DEBUG_STATS) - vs.thread4.stats_enable = 1; + vs.thread4.stats_enable = 1; - /* Vertex program always enabled: + /* Vertex program always enabled: */ vs.vs6.vs_enable = 1; - brw->vs.state_gs_offset = brw_cache_data( &brw->cache[BRW_VS_UNIT], &vs ); + bo = brw_upload_cache(&brw->cache, BRW_VS_UNIT, + key, sizeof(*key), + &brw->vs.prog_bo, 1, + &vs, sizeof(vs), + NULL, NULL); + + /* Emit VS program relocation */ + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + vs.thread0.grf_reg_count << 1, + offsetof(struct brw_vs_unit_state, thread0), + brw->vs.prog_bo); + + return bo; } +static void prepare_vs_unit(struct brw_context *brw) +{ + struct brw_vs_unit_key key; + + vs_unit_populate_key(brw, &key); + + dri_bo_unreference(brw->vs.state_bo); + brw->vs.state_bo = brw_search_cache(&brw->cache, BRW_VS_UNIT, + &key, sizeof(key), + &brw->vs.prog_bo, 1, + NULL); + if (brw->vs.state_bo == NULL) { + brw->vs.state_bo = vs_unit_create_from_key(brw, &key); + } +} const struct brw_tracked_state brw_vs_unit = { .dirty = { @@ -98,5 +159,5 @@ const struct brw_tracked_state brw_vs_unit = { BRW_NEW_URB_FENCE), .cache = CACHE_NEW_VS_PROG }, - .update = upload_vs_unit + .prepare = prepare_vs_unit, }; |