From d5ef59d8b0ce2ea8f0ad983951e696d1679e3eb7 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 8 Oct 2010 16:56:45 +0100 Subject: gallivm: Avoid control flow for two-sided stencil test. --- src/gallium/drivers/llvmpipe/lp_bld_depth.c | 150 +++++++++++----------------- 1 file changed, 58 insertions(+), 92 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 7561899a74..7eabe0508d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -128,57 +128,32 @@ lp_build_stencil_test_single(struct lp_build_context *bld, /** * Do the one or two-sided stencil test comparison. * \sa lp_build_stencil_test_single - * \param face an integer indicating front (+) or back (-) facing polygon. - * If NULL, assume front-facing. + * \param front_facing an integer vector mask, indicating front (~0) or back + * (0) facing polygon. If NULL, assume front-facing. */ static LLVMValueRef lp_build_stencil_test(struct lp_build_context *bld, const struct pipe_stencil_state stencil[2], LLVMValueRef stencilRefs[2], LLVMValueRef stencilVals, - LLVMValueRef face) + LLVMValueRef front_facing) { LLVMValueRef res; assert(stencil[0].enabled); - if (stencil[1].enabled && face) { - /* do two-sided test */ - struct lp_build_flow_context *flow_ctx; - struct lp_build_if_state if_ctx; - LLVMValueRef front_facing; - LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); - LLVMValueRef result = bld->undef; + /* do front face test */ + res = lp_build_stencil_test_single(bld, &stencil[0], + stencilRefs[0], stencilVals); - flow_ctx = lp_build_flow_create(bld->builder); - lp_build_flow_scope_begin(flow_ctx); + if (stencil[1].enabled && front_facing) { + /* do back face test */ + LLVMValueRef back_res; - lp_build_flow_scope_declare(flow_ctx, &result); + back_res = lp_build_stencil_test_single(bld, &stencil[1], + stencilRefs[1], stencilVals); - /* front_facing = face > 0.0 */ - front_facing = LLVMBuildFCmp(bld->builder, LLVMRealUGT, face, zero, ""); - - lp_build_if(&if_ctx, flow_ctx, bld->builder, front_facing); - { - result = lp_build_stencil_test_single(bld, &stencil[0], - stencilRefs[0], stencilVals); - } - lp_build_else(&if_ctx); - { - result = lp_build_stencil_test_single(bld, &stencil[1], - stencilRefs[1], stencilVals); - } - lp_build_endif(&if_ctx); - - lp_build_flow_scope_end(flow_ctx); - lp_build_flow_destroy(flow_ctx); - - res = result; - } - else { - /* do single-side test */ - res = lp_build_stencil_test_single(bld, &stencil[0], - stencilRefs[0], stencilVals); + res = lp_build_select(bld, front_facing, res, back_res); } return res; @@ -195,14 +170,12 @@ lp_build_stencil_op_single(struct lp_build_context *bld, const struct pipe_stencil_state *stencil, enum stencil_op op, LLVMValueRef stencilRef, - LLVMValueRef stencilVals, - LLVMValueRef mask) + LLVMValueRef stencilVals) { - const unsigned stencilMax = 255; /* XXX fix */ struct lp_type type = bld->type; LLVMValueRef res; - LLVMValueRef max = lp_build_const_int_vec(type, stencilMax); + LLVMValueRef max = lp_build_const_int_vec(type, 0xff); unsigned stencil_op; assert(type.sign); @@ -255,19 +228,7 @@ lp_build_stencil_op_single(struct lp_build_context *bld, break; default: assert(0 && "bad stencil op mode"); - res = NULL; - } - - if (stencil->writemask != stencilMax) { - /* mask &= stencil->writemask */ - LLVMValueRef writemask = lp_build_const_int_vec(type, stencil->writemask); - mask = LLVMBuildAnd(bld->builder, mask, writemask, ""); - /* res = (res & mask) | (stencilVals & ~mask) */ - res = lp_build_select_bitwise(bld, writemask, res, stencilVals); - } - else { - /* res = mask ? res : stencilVals */ - res = lp_build_select(bld, mask, res, stencilVals); + res = bld->undef; } return res; @@ -284,49 +245,40 @@ lp_build_stencil_op(struct lp_build_context *bld, LLVMValueRef stencilRefs[2], LLVMValueRef stencilVals, LLVMValueRef mask, - LLVMValueRef face) + LLVMValueRef front_facing) { - assert(stencil[0].enabled); + LLVMValueRef res; - if (stencil[1].enabled && face) { - /* do two-sided op */ - struct lp_build_flow_context *flow_ctx; - struct lp_build_if_state if_ctx; - LLVMValueRef front_facing; - LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); - LLVMValueRef result = bld->undef; + assert(stencil[0].enabled); - flow_ctx = lp_build_flow_create(bld->builder); - lp_build_flow_scope_begin(flow_ctx); + /* do front face op */ + res = lp_build_stencil_op_single(bld, &stencil[0], op, + stencilRefs[0], stencilVals); - lp_build_flow_scope_declare(flow_ctx, &result); + if (stencil[1].enabled && front_facing) { + /* do back face op */ + LLVMValueRef back_res; - /* front_facing = face > 0.0 */ - front_facing = LLVMBuildFCmp(bld->builder, LLVMRealUGT, face, zero, ""); + back_res = lp_build_stencil_op_single(bld, &stencil[1], op, + stencilRefs[1], stencilVals); - lp_build_if(&if_ctx, flow_ctx, bld->builder, front_facing); - { - result = lp_build_stencil_op_single(bld, &stencil[0], op, - stencilRefs[0], stencilVals, mask); - } - lp_build_else(&if_ctx); - { - result = lp_build_stencil_op_single(bld, &stencil[1], op, - stencilRefs[1], stencilVals, mask); - } - lp_build_endif(&if_ctx); - - lp_build_flow_scope_end(flow_ctx); - lp_build_flow_destroy(flow_ctx); + res = lp_build_select(bld, front_facing, res, back_res); + } - return result; + if (stencil->writemask != 0xff) { + /* mask &= stencil->writemask */ + LLVMValueRef writemask = lp_build_const_int_vec(bld->type, stencil->writemask); + mask = LLVMBuildAnd(bld->builder, mask, writemask, ""); + /* res = (res & mask) | (stencilVals & ~mask) */ + res = lp_build_select_bitwise(bld, writemask, res, stencilVals); } else { - /* do single-sided op */ - return lp_build_stencil_op_single(bld, &stencil[0], op, - stencilRefs[0], stencilVals, mask); + /* res = mask ? res : stencilVals */ + res = lp_build_select(bld, mask, res, stencilVals); } + + return res; } @@ -519,6 +471,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, LLVMValueRef z_bitmask = NULL, stencil_shift = NULL; LLVMValueRef z_pass = NULL, s_pass_mask = NULL; LLVMValueRef orig_mask = mask->value; + LLVMValueRef front_facing = NULL; /* Sanity checking */ { @@ -616,21 +569,34 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, } } - if (stencil[0].enabled) { + + if (face) { + LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); + + /* front_facing = face > 0.0 ? ~0 : 0 */ + front_facing = LLVMBuildFCmp(builder, LLVMRealUGT, face, zero, ""); + front_facing = LLVMBuildSExt(builder, front_facing, + LLVMIntType(bld.type.length*bld.type.width), + ""); + front_facing = LLVMBuildBitCast(builder, front_facing, + bld.int_vec_type, ""); + } + /* convert scalar stencil refs into vectors */ stencil_refs[0] = lp_build_broadcast_scalar(&bld, stencil_refs[0]); stencil_refs[1] = lp_build_broadcast_scalar(&bld, stencil_refs[1]); s_pass_mask = lp_build_stencil_test(&sbld, stencil, - stencil_refs, stencil_vals, face); + stencil_refs, stencil_vals, + front_facing); /* apply stencil-fail operator */ { LLVMValueRef s_fail_mask = lp_build_andnot(&bld, orig_mask, s_pass_mask); stencil_vals = lp_build_stencil_op(&sbld, stencil, S_FAIL_OP, stencil_refs, stencil_vals, - s_fail_mask, face); + s_fail_mask, front_facing); } } @@ -676,13 +642,13 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, z_fail_mask = lp_build_andnot(&bld, orig_mask, z_pass); stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_FAIL_OP, stencil_refs, stencil_vals, - z_fail_mask, face); + z_fail_mask, front_facing); /* apply Z-pass operator */ z_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, z_pass, ""); stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP, stencil_refs, stencil_vals, - z_pass_mask, face); + z_pass_mask, front_facing); } } else { @@ -692,7 +658,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, s_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, s_pass_mask, ""); stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP, stencil_refs, stencil_vals, - s_pass_mask, face); + s_pass_mask, front_facing); } /* The Z bits are already in the right place but we may need to shift the -- cgit v1.2.3