From aa4cb5e2d8d48c7dcc9653c61a9e25494e3e7b2a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 7 Oct 2010 15:01:07 +0100 Subject: llvmpipe: try to be sensible about whether to branch after mask updates Don't branch more than once in quick succession. Don't branch at the end of the shader. --- src/gallium/auxiliary/gallivm/lp_bld_flow.c | 6 +-- src/gallium/auxiliary/gallivm/lp_bld_flow.h | 3 ++ src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 11 +++- src/gallium/drivers/llvmpipe/lp_bld_alpha.c | 6 ++- src/gallium/drivers/llvmpipe/lp_bld_alpha.h | 3 +- src/gallium/drivers/llvmpipe/lp_bld_depth.c | 14 ++++- src/gallium/drivers/llvmpipe/lp_bld_depth.h | 3 +- src/gallium/drivers/llvmpipe/lp_state_fs.c | 69 ++++++++++++++++--------- 8 files changed, 80 insertions(+), 35 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c index cd5fbc2463..1ec33c742e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c @@ -450,7 +450,7 @@ lp_build_flow_skip_end(struct lp_build_flow_context *flow) /** * Check if the mask predicate is zero. If so, jump to the end of the block. */ -static void +void lp_build_mask_check(struct lp_build_mask_context *mask) { LLVMBuilderRef builder = mask->flow->builder; @@ -490,8 +490,6 @@ lp_build_mask_begin(struct lp_build_mask_context *mask, lp_build_flow_scope_begin(flow); lp_build_flow_scope_declare(flow, &mask->value); lp_build_flow_skip_begin(flow); - - lp_build_mask_check(mask); } @@ -505,8 +503,6 @@ lp_build_mask_update(struct lp_build_mask_context *mask, LLVMValueRef value) { mask->value = LLVMBuildAnd( mask->flow->builder, mask->value, value, ""); - - lp_build_mask_check(mask); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h index fffb493a93..095c781ec5 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h @@ -94,6 +94,9 @@ void lp_build_mask_update(struct lp_build_mask_context *mask, LLVMValueRef value); +void +lp_build_mask_check(struct lp_build_mask_context *mask); + LLVMValueRef lp_build_mask_end(struct lp_build_mask_context *mask); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 441aebae29..03020a62f8 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -959,8 +959,13 @@ emit_kil( } } - if(mask) + if(mask) { lp_build_mask_update(bld->mask, mask); + + /* XXX: figure out if we are at the end of the shader and skip this: + */ + lp_build_mask_check(bld->mask); + } } @@ -987,6 +992,10 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld, } lp_build_mask_update(bld->mask, mask); + + /* XXX: figure out if we are at the end of the shader and skip this: + */ + lp_build_mask_check(bld->mask); } static void diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c index e28efe778f..e50643790c 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c @@ -48,7 +48,8 @@ lp_build_alpha_test(LLVMBuilderRef builder, struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef alpha, - LLVMValueRef ref) + LLVMValueRef ref, + boolean do_branch) { struct lp_build_context bld; LLVMValueRef test; @@ -60,4 +61,7 @@ lp_build_alpha_test(LLVMBuilderRef builder, lp_build_name(test, "alpha_mask"); lp_build_mask_update(mask, test); + + if (do_branch) + lp_build_mask_check(mask); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h index 44603b418c..27ca8aad4d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h @@ -48,7 +48,8 @@ lp_build_alpha_test(LLVMBuilderRef builder, struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef alpha, - LLVMValueRef ref); + LLVMValueRef ref, + boolean do_branch); #endif /* !LP_BLD_ALPHA_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 09b82fbe9b..6b8ffb6ca2 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -462,7 +462,8 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, LLVMValueRef z_src, LLVMValueRef zs_dst_ptr, LLVMValueRef face, - LLVMValueRef counter) + LLVMValueRef counter, + boolean do_branch) { struct lp_type type; struct lp_build_context bld; @@ -515,6 +516,9 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, lp_build_mask_update(mask, z_pass); + if (do_branch) + lp_build_mask_check(mask); + /* No need to worry about old stencil contents, just blend the * old and new values and shift into the correct position for * storage. @@ -701,6 +705,11 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, * buffer values. Don't need to update Z buffer values. */ lp_build_mask_update(mask, z_pass); + + if (do_branch) { + lp_build_mask_check(mask); + do_branch = FALSE; + } } if (depth->writemask) { @@ -779,6 +788,9 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, if (depth->enabled && stencil[0].enabled) lp_build_mask_update(mask, z_pass); + if (do_branch) + lp_build_mask_check(mask); + if (counter) lp_build_occlusion_count(builder, type, mask->value, counter); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h index e257a5bd7d..2a63bb9378 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h @@ -61,7 +61,8 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, LLVMValueRef zs_src, LLVMValueRef zs_dst_ptr, LLVMValueRef facing, - LLVMValueRef counter); + LLVMValueRef counter, + boolean do_branch); #endif /* !LP_BLD_DEPTH_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index b7a51cd667..df5dd83c87 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -116,7 +116,8 @@ generate_depth_stencil(LLVMBuilderRef builder, LLVMValueRef src, LLVMValueRef dst_ptr, LLVMValueRef facing, - LLVMValueRef counter) + LLVMValueRef counter, + boolean do_branch) { const struct util_format_description *format_desc; @@ -136,7 +137,8 @@ generate_depth_stencil(LLVMBuilderRef builder, src, dst_ptr, facing, - counter); + counter, + do_branch); } @@ -253,6 +255,9 @@ generate_fs(struct llvmpipe_context *lp, struct lp_build_flow_context *flow; struct lp_build_mask_context mask; boolean early_depth_stencil_test; + boolean simple_shader = (shader->info.file_count[TGSI_FILE_SAMPLER] == 0 && + shader->info.num_inputs < 3 && + shader->info.num_instructions < 8); unsigned attrib; unsigned chan; unsigned cbuf; @@ -288,15 +293,6 @@ generate_fs(struct llvmpipe_context *lp, *pmask = lp_build_const_int_vec(type, ~0); } - /* 'mask' will control execution based on quad's pixel alive/killed state */ - lp_build_mask_begin(&mask, flow, type, *pmask); - - lp_build_interp_soa_update_pos(interp, i); - - /* Try to avoid the 1/w for quads where mask is zero. TODO: avoid - * this for depth-fail quads also. - */ - z = interp->pos[2]; early_depth_stencil_test = (key->depth.enabled || key->stencil[0].enabled) && @@ -304,10 +300,22 @@ generate_fs(struct llvmpipe_context *lp, !shader->info.uses_kill && !shader->info.writes_z; + /* 'mask' will control execution based on quad's pixel alive/killed state */ + lp_build_mask_begin(&mask, flow, type, *pmask); + + if (!early_depth_stencil_test && !simple_shader) + lp_build_mask_check(&mask); + + lp_build_interp_soa_update_pos(interp, i); + z = interp->pos[2]; + if (early_depth_stencil_test) generate_depth_stencil(builder, key, type, &mask, - stencil_refs, z, depth_ptr, facing, counter); + stencil_refs, + z, depth_ptr, + facing, counter, + !simple_shader); lp_build_interp_soa_update_inputs(interp, i); @@ -337,7 +345,7 @@ generate_fs(struct llvmpipe_context *lp, alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr); alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value); lp_build_alpha_test(builder, key->alpha.func, type, - &mask, alpha, alpha_ref_value); + &mask, alpha, alpha_ref_value, FALSE); } LLVMBuildStore(builder, out, color[cbuf][chan]); @@ -356,7 +364,8 @@ generate_fs(struct llvmpipe_context *lp, if (!early_depth_stencil_test) generate_depth_stencil(builder, key, type, &mask, - stencil_refs, z, depth_ptr, facing, counter); + stencil_refs, z, depth_ptr, + facing, counter, FALSE); lp_build_mask_end(&mask); @@ -386,7 +395,8 @@ generate_blend(const struct pipe_blend_state *blend, LLVMValueRef context_ptr, LLVMValueRef mask, LLVMValueRef *src, - LLVMValueRef dst_ptr) + LLVMValueRef dst_ptr, + boolean do_branch) { struct lp_build_context bld; struct lp_build_flow_context *flow; @@ -401,9 +411,9 @@ generate_blend(const struct pipe_blend_state *blend, lp_build_context_init(&bld, builder, type); flow = lp_build_flow_create(builder); - - /* we'll use this mask context to skip blending if all pixels are dead */ lp_build_mask_begin(&mask_ctx, flow, type, mask); + if (do_branch) + lp_build_mask_check(&mask_ctx); vec_type = lp_build_vec_type(type); @@ -670,14 +680,23 @@ generate_fragment(struct llvmpipe_context *lp, /* * Blending. */ - generate_blend(&key->blend, - rt, - builder, - blend_type, - context_ptr, - blend_mask, - blend_in_color, - color_ptr); + { + /* Could the 4x4 have been killed? + */ + boolean do_branch = ((key->depth.enabled || key->stencil[0].enabled) && + !key->alpha.enabled && + !shader->info.uses_kill); + + generate_blend(&key->blend, + rt, + builder, + blend_type, + context_ptr, + blend_mask, + blend_in_color, + color_ptr, + do_branch); + } } #ifdef PIPE_ARCH_X86 -- cgit v1.2.3