diff options
author | Eric Anholt <eric@anholt.net> | 2010-06-07 09:25:10 -0700 |
---|---|---|
committer | Eric Anholt <eric@anholt.net> | 2010-06-11 00:15:56 -0700 |
commit | 73de09f265cb1c66d70fd9eb92021882bfbbbef6 (patch) | |
tree | 999c40cc2b004ec8cc4670f21fbe72c485884821 /src/mesa/drivers/dri/i965/brw_misc_state.c | |
parent | 118a47623a11a374df371d52ed0294224e6a62dc (diff) |
i965: Convert the binding table to streamed indirect state.
This slightly reduces reduces cairo-gl firefox-talos-gfx runtime on my
Ironlake:
before:
[ # ] backend test min(s) median(s) stddev. count
[ 0] gl firefox-talos-gfx 38.236 38.383 0.43% 5/6
after:
[ 0] gl firefox-talos-gfx 37.799 38.203 0.39% 6/6
It turns out the cost of caching these objects and looking them up in
the cache again is greater than the cost of just computing the object
again, particularly when the overhead of having a separate BO to pin
is removed.
(Those that are paying close attention will note that this is a
reversal of the path I was moving the driver in a couple of years ago.
The major thing that has changed is that back then all state was
recomputed when we wrapped the streaming state buffer, including
recompiling our precious programs. Now, we're uncaching just the
objects that are cheap to compute, and retaining caching of expensive
objects)
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_misc_state.c')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_misc_state.c | 28 |
1 files changed, 12 insertions, 16 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 35908ee7b6..ab5d5240d0 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -96,12 +96,6 @@ const struct brw_tracked_state brw_drawing_rect = { .emit = upload_drawing_rect }; -static void prepare_binding_table_pointers(struct brw_context *brw) -{ - brw_add_validated_bo(brw, brw->vs.bind_bo); - brw_add_validated_bo(brw, brw->wm.bind_bo); -} - /** * Upload the binding table pointers, which point each stage's array of surface * state pointers. @@ -116,23 +110,24 @@ static void upload_binding_table_pointers(struct brw_context *brw) BEGIN_BATCH(6); OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); if (brw->vs.bind_bo != NULL) - OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */ + OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, + brw->vs.bind_bo_offset); /* vs */ else OUT_BATCH(0); OUT_BATCH(0); /* gs */ OUT_BATCH(0); /* clip */ OUT_BATCH(0); /* sf */ - OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */ + OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, + brw->wm.bind_bo_offset); /* wm/ps */ ADVANCE_BATCH(); } const struct brw_tracked_state brw_binding_table_pointers = { .dirty = { .mesa = 0, - .brw = BRW_NEW_BATCH, - .cache = CACHE_NEW_SURF_BIND, + .brw = BRW_NEW_BATCH | BRW_NEW_BINDING_TABLE, + .cache = 0, }, - .prepare = prepare_binding_table_pointers, .emit = upload_binding_table_pointers, }; @@ -154,21 +149,22 @@ static void upload_gen6_binding_table_pointers(struct brw_context *brw) GEN6_BINDING_TABLE_MODIFY_PS | (4 - 2)); if (brw->vs.bind_bo != NULL) - OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */ + OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, + brw->vs.bind_bo_offset); /* vs */ else OUT_BATCH(0); OUT_BATCH(0); /* gs */ - OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */ + OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, + brw->wm.bind_bo_offset); /* wm/ps */ ADVANCE_BATCH(); } const struct brw_tracked_state gen6_binding_table_pointers = { .dirty = { .mesa = 0, - .brw = BRW_NEW_BATCH, - .cache = CACHE_NEW_SURF_BIND, + .brw = BRW_NEW_BATCH | BRW_NEW_BINDING_TABLE, + .cache = 0, }, - .prepare = prepare_binding_table_pointers, .emit = upload_gen6_binding_table_pointers, }; |