summaryrefslogtreecommitdiff
path: root/src/mesa/drivers
diff options
context:
space:
mode:
authorKeith Whitwell <keith@tungstengraphics.com>2008-11-15 16:53:24 +0000
committerKeith Whitwell <keith@tungstengraphics.com>2008-11-15 16:53:24 +0000
commit7468765b18be202a64d58b83f6267b6973ea4897 (patch)
tree6e9fa6dd4a2ff79787bcae247377d07c00a036cf /src/mesa/drivers
parent5e1454a036be6da2c48e2e20bf6f8047ee1a94d3 (diff)
parent80d6379722a1249ce13db79a898d340644936f67 (diff)
Merge commit 'origin/master' into gallium-0.2
Conflicts: src/mesa/shader/prog_print.c
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_state.c16
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw_upload.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf_state.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_urb.c41
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_state.c16
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_state.c9
7 files changed, 79 insertions, 8 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c
index 740c7cbd10..9b0d7eab7b 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_state.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_state.c
@@ -88,7 +88,21 @@ clip_unit_create_from_key(struct brw_context *brw,
clip.thread4.nr_urb_entries = key->nr_urb_entries;
clip.thread4.urb_entry_allocation_size = key->urb_size - 1;
- clip.thread4.max_threads = 1; /* 2 threads */
+ /* If we have enough clip URB entries to run two threads, do so.
+ */
+ if (key->nr_urb_entries >= 10) {
+ /* Half of the URB entries go to each thread, and it has to be an
+ * even number.
+ */
+ assert(key->nr_urb_entries % 2 == 0);
+ clip.thread4.max_threads = 2 - 1;
+ } else {
+ assert(key->nr_urb_entries >= 5);
+ clip.thread4.max_threads = 1 - 1;
+ }
+
+ if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
+ clip.thread4.max_threads = 0;
if (INTEL_DEBUG & DEBUG_STATS)
clip.thread4.stats_enable = 1;
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index d87b8f8a84..f893dd6742 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -105,6 +105,7 @@ static GLuint brw_set_prim(struct brw_context *brw, GLenum prim)
}
brw_validate_state(brw);
+ brw_upload_state(brw);
}
return hw_prim[prim];
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 4080c5e322..73d6dea01e 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -353,6 +353,7 @@ static void brw_prepare_vertices(struct brw_context *brw)
intel_buffer_object(input->glarray->BufferObj);
/* Named buffer object: Just reference its contents directly. */
+ dri_bo_unreference(input->bo);
input->bo = intel_bufferobj_buffer(intel, intel_buffer,
INTEL_READ);
dri_bo_reference(input->bo);
diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c
index 4f925d1810..506126fcfb 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_state.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_state.c
@@ -172,7 +172,8 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
sf.thread4.nr_urb_entries = key->nr_urb_entries;
sf.thread4.urb_entry_allocation_size = key->sfsize - 1;
- sf.thread4.max_threads = MIN2(12, key->nr_urb_entries / 2) - 1;
+ /* Each SF thread produces 1 PUE, and there can be up to 24 threads */
+ sf.thread4.max_threads = MIN2(24, key->nr_urb_entries) - 1;
if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
sf.thread4.max_threads = 0;
diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c
index 1a004176de..7673dd36eb 100644
--- a/src/mesa/drivers/dri/i965/brw_urb.c
+++ b/src/mesa/drivers/dri/i965/brw_urb.c
@@ -42,7 +42,44 @@
#define SF 3
#define CS 4
-/* XXX: Are the min_entry_size numbers useful?
+/** @file brw_urb.c
+ *
+ * Manages the division of the URB space between the various fixed-function
+ * units.
+ *
+ * See the Thread Initiation Management section of the GEN4 B-Spec, and
+ * the individual *_STATE structures for restrictions on numbers of
+ * entries and threads.
+ */
+
+/*
+ * Generally, a unit requires a min_nr_entries based on how many entries
+ * it produces before the downstream unit gets unblocked and can use and
+ * dereference some of its handles.
+ *
+ * The SF unit preallocates a PUE at the start of thread dispatch, and only
+ * uses that one. So it requires one entry per thread.
+ *
+ * For CLIP, the SF unit will hold the previous primitive while the
+ * next is getting assembled, meaning that linestrips require 3 CLIP VUEs
+ * (vertices) to ensure continued processing, trifans require 4, and tristrips
+ * require 5. There can be 1 or 2 threads, and each has the same requirement.
+ *
+ * GS has the same requirement as CLIP, but it never handles tristrips,
+ * so we can lower the minimum to 4 for the POLYGONs (trifans) it produces.
+ * We only run it single-threaded.
+ *
+ * For VS, the number of entries may be 8, 12, 16, or 32 (or 64 on G4X).
+ * Each thread processes 2 preallocated VUEs (vertices) at a time, and they
+ * get streamed down as soon as threads processing earlier vertices get
+ * theirs accepted.
+ *
+ * Each unit will take the number of URB entries we give it (based on the
+ * entry size calculated in brw_vs_emit.c for VUEs, brw_sf_emit.c for PUEs,
+ * and brw_curbe.c for the CURBEs) and decide its maximum number of
+ * threads it can support based on that. in brw_*_state.c.
+ *
+ * XXX: Are the min_entry_size numbers useful?
* XXX: Verify min_nr_entries, esp for VS.
* XXX: Verify SF min_entry_size.
*/
@@ -54,7 +91,7 @@ static const struct {
} limits[CS+1] = {
{ 16, 32, 1, 5 }, /* vs */
{ 4, 8, 1, 5 }, /* gs */
- { 6, 8, 1, 5 }, /* clp */
+ { 5, 10, 1, 5 }, /* clp */
{ 1, 8, 1, 12 }, /* sf */
{ 1, 4, 1, 32 } /* cs */
};
diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
index 6e66f54524..942581696d 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_state.c
@@ -77,12 +77,19 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
{
struct brw_vs_unit_state vs;
dri_bo *bo;
+ int chipset_max_threads;
memset(&vs, 0, sizeof(vs));
vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */
vs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ /* Choosing multiple program flow means that we may get 2-vertex threads,
+ * which will have the channel mask for dwords 4-7 enabled in the thread,
+ * and those dwords will be written to the second URB handle when we
+ * brw_urb_WRITE() results.
+ */
+ vs.thread1.single_program_flow = 0;
vs.thread3.urb_entry_read_length = key->urb_entry_read_length;
vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
vs.thread3.dispatch_grf_start_reg = 1;
@@ -91,8 +98,13 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
vs.thread4.nr_urb_entries = key->nr_urb_entries;
vs.thread4.urb_entry_allocation_size = key->urb_size - 1;
- vs.thread4.max_threads = MIN2(MAX2(0, (key->nr_urb_entries - 6) / 2 - 1),
- 15);
+
+ if (BRW_IS_G4X(brw))
+ chipset_max_threads = 32;
+ else
+ chipset_max_threads = 16;
+ vs.thread4.max_threads = CLAMP(key->nr_urb_entries / 2,
+ 1, chipset_max_threads) - 1;
if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
vs.thread4.max_threads = 0;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index 61fe97a463..fd461618bc 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -67,8 +67,13 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
key->max_threads = 1;
- else
- key->max_threads = 32;
+ else {
+ /* WM maximum threads is number of EUs times number of threads per EU. */
+ if (BRW_IS_G4X(brw))
+ key->max_threads = 10 * 5;
+ else
+ key->max_threads = 8 * 4;
+ }
/* CACHE_NEW_WM_PROG */
key->total_grf = brw->wm.prog_data->total_grf;