From 4e7252510976d8d3ff12437ea8842129f24d88f5 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 21 Oct 2010 15:07:45 -0700 Subject: i965: Correct scratch space allocation. One, it was allocating increments of 1kb, but per thread scratch space is a power of two. Two, the new FS wasn't getting total_scratch set at all, so everyone thought they had 1kb and writes beyond 1kb would go stomping on a neighbor thread. With this plus the previous register spilling for the new FS, glsl-fs-convolution-1 passes. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 1 - src/mesa/drivers/dri/i965/brw_wm.c | 21 +++++++++++++-------- src/mesa/drivers/dri/i965/brw_wm_state.c | 5 ++--- 3 files changed, 15 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index f38976aa8a..aa945c9771 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3252,7 +3252,6 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) } c->prog_data.total_grf = v.grf_used; - c->prog_data.total_scratch = 0; return GL_TRUE; } diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index f2ce756564..7f3ba5f058 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -114,14 +114,6 @@ brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) /* how many general-purpose registers are used */ c->prog_data.total_grf = c->max_wm_grf; - /* Scratch space is used for register spilling */ - if (c->last_scratch) { - c->prog_data.total_scratch = c->last_scratch + 0x40; - } - else { - c->prog_data.total_scratch = 0; - } - /* Emit GEN4 code. */ brw_wm_emit(c); @@ -193,6 +185,19 @@ static void do_wm_prog( struct brw_context *brw, } } + /* Scratch space is used for register spilling */ + if (c->last_scratch) { + /* Per-thread scratch space is power-of-two sized. */ + for (c->prog_data.total_scratch = 1024; + c->prog_data.total_scratch <= c->last_scratch; + c->prog_data.total_scratch *= 2) { + /* empty */ + } + } + else { + c->prog_data.total_scratch = 0; + } + if (INTEL_DEBUG & DEBUG_WM) fprintf(stderr, "\n"); diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 411204e704..433ccc66f0 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -97,7 +97,7 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) key->urb_entry_read_length = brw->wm.prog_data->urb_read_length; key->curb_entry_read_length = brw->wm.prog_data->curb_read_length; key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; - key->total_scratch = ALIGN(brw->wm.prog_data->total_scratch, 1024); + key->total_scratch = brw->wm.prog_data->total_scratch; /* BRW_NEW_URB_FENCE */ key->urb_size = brw->urb.vsize; @@ -184,7 +184,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, if (key->total_scratch != 0) { wm.thread2.scratch_space_base_pointer = brw->wm.scratch_bo->offset >> 10; /* reloc */ - wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1; + wm.thread2.per_thread_scratch_space = ffs(key->total_scratch) - 11; } else { wm.thread2.scratch_space_base_pointer = 0; wm.thread2.per_thread_scratch_space = 0; @@ -293,7 +293,6 @@ static void upload_wm_unit( struct brw_context *brw ) * bother reducing the allocation later, since we use scratch so * rarely. */ - assert(key.total_scratch <= 12 * 1024); if (key.total_scratch) { GLuint total = key.total_scratch * brw->wm_max_threads; -- cgit v1.2.3