summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2008-11-02 18:50:59 -0800
committerEric Anholt <eric@anholt.net>2008-11-12 13:07:23 -0800
commit62ff7ab7206e1b9e195e81ad1a96b579819edb57 (patch)
treefcb4879783f4d732a307423058acd3e9fe613ff0
parent9dadfc09a9dd492f14eec8de060d3f5198c766ec (diff)
i965: Fix up VS max_threads for G4X and removing a magic number.
As far as I can read in the docs, VS threads can be 1:1 with the pairs of VUE handles allocated for them. Also, G4X can run twice as many threads as before (though we won't unless the we bump the preferred URB entries for VS).
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_state.c16
1 files changed, 14 insertions, 2 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
index 6e66f54524..942581696d 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_state.c
@@ -77,12 +77,19 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
{
struct brw_vs_unit_state vs;
dri_bo *bo;
+ int chipset_max_threads;
memset(&vs, 0, sizeof(vs));
vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */
vs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ /* Choosing multiple program flow means that we may get 2-vertex threads,
+ * which will have the channel mask for dwords 4-7 enabled in the thread,
+ * and those dwords will be written to the second URB handle when we
+ * brw_urb_WRITE() results.
+ */
+ vs.thread1.single_program_flow = 0;
vs.thread3.urb_entry_read_length = key->urb_entry_read_length;
vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
vs.thread3.dispatch_grf_start_reg = 1;
@@ -91,8 +98,13 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
vs.thread4.nr_urb_entries = key->nr_urb_entries;
vs.thread4.urb_entry_allocation_size = key->urb_size - 1;
- vs.thread4.max_threads = MIN2(MAX2(0, (key->nr_urb_entries - 6) / 2 - 1),
- 15);
+
+ if (BRW_IS_G4X(brw))
+ chipset_max_threads = 32;
+ else
+ chipset_max_threads = 16;
+ vs.thread4.max_threads = CLAMP(key->nr_urb_entries / 2,
+ 1, chipset_max_threads) - 1;
if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
vs.thread4.max_threads = 0;