summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2010-10-21 15:07:45 -0700
committerEric Anholt <eric@anholt.net>2010-10-21 15:21:28 -0700
commit4e7252510976d8d3ff12437ea8842129f24d88f5 (patch)
treec2e50a075fd7ede483e7265f46567cdbc5370d2b
parent0b77d57394a3712851ec271aa7ad353d56f302a1 (diff)
i965: Correct scratch space allocation.
One, it was allocating increments of 1kb, but per thread scratch space is a power of two. Two, the new FS wasn't getting total_scratch set at all, so everyone thought they had 1kb and writes beyond 1kb would go stomping on a neighbor thread. With this plus the previous register spilling for the new FS, glsl-fs-convolution-1 passes.
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp1
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c21
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_state.c5
3 files changed, 15 insertions, 12 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index f38976aa8a..aa945c9771 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3252,7 +3252,6 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
}
c->prog_data.total_grf = v.grf_used;
- c->prog_data.total_scratch = 0;
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index f2ce756564..7f3ba5f058 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -114,14 +114,6 @@ brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
/* how many general-purpose registers are used */
c->prog_data.total_grf = c->max_wm_grf;
- /* Scratch space is used for register spilling */
- if (c->last_scratch) {
- c->prog_data.total_scratch = c->last_scratch + 0x40;
- }
- else {
- c->prog_data.total_scratch = 0;
- }
-
/* Emit GEN4 code.
*/
brw_wm_emit(c);
@@ -193,6 +185,19 @@ static void do_wm_prog( struct brw_context *brw,
}
}
+ /* Scratch space is used for register spilling */
+ if (c->last_scratch) {
+ /* Per-thread scratch space is power-of-two sized. */
+ for (c->prog_data.total_scratch = 1024;
+ c->prog_data.total_scratch <= c->last_scratch;
+ c->prog_data.total_scratch *= 2) {
+ /* empty */
+ }
+ }
+ else {
+ c->prog_data.total_scratch = 0;
+ }
+
if (INTEL_DEBUG & DEBUG_WM)
fprintf(stderr, "\n");
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index 411204e704..433ccc66f0 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -97,7 +97,7 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
key->urb_entry_read_length = brw->wm.prog_data->urb_read_length;
key->curb_entry_read_length = brw->wm.prog_data->curb_read_length;
key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
- key->total_scratch = ALIGN(brw->wm.prog_data->total_scratch, 1024);
+ key->total_scratch = brw->wm.prog_data->total_scratch;
/* BRW_NEW_URB_FENCE */
key->urb_size = brw->urb.vsize;
@@ -184,7 +184,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
if (key->total_scratch != 0) {
wm.thread2.scratch_space_base_pointer =
brw->wm.scratch_bo->offset >> 10; /* reloc */
- wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1;
+ wm.thread2.per_thread_scratch_space = ffs(key->total_scratch) - 11;
} else {
wm.thread2.scratch_space_base_pointer = 0;
wm.thread2.per_thread_scratch_space = 0;
@@ -293,7 +293,6 @@ static void upload_wm_unit( struct brw_context *brw )
* bother reducing the allocation later, since we use scratch so
* rarely.
*/
- assert(key.total_scratch <= 12 * 1024);
if (key.total_scratch) {
GLuint total = key.total_scratch * brw->wm_max_threads;