From 87dd859b342b844add906358810445da21b6b092 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 6 Oct 2010 18:44:51 +0100 Subject: gallivm: Compute lod as integer whenever possible. More accurate/faster results for PIPE_TEX_MIPFILTER_NEAREST. Less FP <-> SI conversion overall. --- src/gallium/auxiliary/gallivm/lp_bld_sample.c | 170 ++++++++++++++-------- src/gallium/auxiliary/gallivm/lp_bld_sample.h | 12 +- src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c | 40 ++--- src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 31 ++-- 4 files changed, 158 insertions(+), 95 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index c1c98bf859..3287cf7c37 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -167,6 +167,73 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, } +/** + * Generate code to compute coordinate gradient (rho). + * \param ddx partial derivatives of (s, t, r, q) with respect to X + * \param ddy partial derivatives of (s, t, r, q) with respect to Y + * \param width scalar int texture width + * \param height scalar int texture height + * \param depth scalar int texture depth + * + * XXX: The resulting rho is scalar, so we ignore all but the first element of + * derivatives that are passed by the shader. + */ +static LLVMValueRef +lp_build_rho(struct lp_build_sample_context *bld, + const LLVMValueRef ddx[4], + const LLVMValueRef ddy[4], + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef depth) +{ + struct lp_build_context *float_bld = &bld->float_bld; + const int dims = texture_dims(bld->static_state->target); + LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + LLVMValueRef dsdx, dsdy; + LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL; + LLVMValueRef rho; + + dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx"); + dsdx = lp_build_abs(float_bld, dsdx); + dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy"); + dsdy = lp_build_abs(float_bld, dsdy); + if (dims > 1) { + dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx"); + dtdx = lp_build_abs(float_bld, dtdx); + dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy"); + dtdy = lp_build_abs(float_bld, dtdy); + if (dims > 2) { + drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx"); + drdx = lp_build_abs(float_bld, drdx); + drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy"); + drdy = lp_build_abs(float_bld, drdy); + } + } + + /* Compute rho = max of all partial derivatives scaled by texture size. + * XXX this could be vectorized somewhat + */ + rho = LLVMBuildFMul(bld->builder, + lp_build_max(float_bld, dsdx, dsdy), + lp_build_int_to_float(float_bld, width), ""); + if (dims > 1) { + LLVMValueRef max; + max = LLVMBuildFMul(bld->builder, + lp_build_max(float_bld, dtdx, dtdy), + lp_build_int_to_float(float_bld, height), ""); + rho = lp_build_max(float_bld, rho, max); + if (dims > 2) { + max = LLVMBuildFMul(bld->builder, + lp_build_max(float_bld, drdx, drdy), + lp_build_int_to_float(float_bld, depth), ""); + rho = lp_build_max(float_bld, rho, max); + } + } + + return rho; +} + + /** * Generate code to compute texture level of detail (lambda). * \param ddx partial derivatives of (s, t, r, q) with respect to X @@ -180,7 +247,7 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, * XXX: The resulting lod is scalar, so ignore all but the first element of * derivatives, lod_bias, etc that are passed by the shader. */ -LLVMValueRef +void lp_build_lod_selector(struct lp_build_sample_context *bld, unsigned unit, const LLVMValueRef ddx[4], @@ -189,9 +256,18 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, LLVMValueRef explicit_lod, /* optional */ LLVMValueRef width, LLVMValueRef height, - LLVMValueRef depth) + LLVMValueRef depth, + unsigned mip_filter, + LLVMValueRef *out_lod_ipart, + LLVMValueRef *out_lod_fpart) { + struct lp_build_context *float_bld = &bld->float_bld; + LLVMValueRef lod; + + *out_lod_ipart = bld->int_bld.zero; + *out_lod_fpart = bld->float_bld.zero; + if (bld->static_state->min_max_lod_equal) { /* User is forcing sampling from a particular mipmap level. * This is hit during mipmap generation. @@ -199,68 +275,40 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, LLVMValueRef min_lod = bld->dynamic_state->min_lod(bld->dynamic_state, bld->builder, unit); - return min_lod; + lod = min_lod; } else { - struct lp_build_context *float_bld = &bld->float_bld; LLVMValueRef sampler_lod_bias = bld->dynamic_state->lod_bias(bld->dynamic_state, bld->builder, unit); LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0); - LLVMValueRef lod; if (explicit_lod) { lod = LLVMBuildExtractElement(bld->builder, explicit_lod, index0, ""); } else { - const int dims = texture_dims(bld->static_state->target); - LLVMValueRef dsdx, dsdy; - LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL; LLVMValueRef rho; - dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx"); - dsdx = lp_build_abs(float_bld, dsdx); - dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy"); - dsdy = lp_build_abs(float_bld, dsdy); - if (dims > 1) { - dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx"); - dtdx = lp_build_abs(float_bld, dtdx); - dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy"); - dtdy = lp_build_abs(float_bld, dtdy); - if (dims > 2) { - drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx"); - drdx = lp_build_abs(float_bld, drdx); - drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy"); - drdy = lp_build_abs(float_bld, drdy); - } - } + rho = lp_build_rho(bld, ddx, ddy, width, height, depth); - /* Compute rho = max of all partial derivatives scaled by texture size. - * XXX this could be vectorized somewhat - */ - rho = LLVMBuildFMul(bld->builder, - lp_build_max(float_bld, dsdx, dsdy), - lp_build_int_to_float(float_bld, width), ""); - if (dims > 1) { - LLVMValueRef max; - max = LLVMBuildFMul(bld->builder, - lp_build_max(float_bld, dtdx, dtdy), - lp_build_int_to_float(float_bld, height), ""); - rho = lp_build_max(float_bld, rho, max); - if (dims > 2) { - max = LLVMBuildFMul(bld->builder, - lp_build_max(float_bld, drdx, drdy), - lp_build_int_to_float(float_bld, depth), ""); - rho = lp_build_max(float_bld, rho, max); - } + /* compute lod = log2(rho) */ + if ((mip_filter == PIPE_TEX_MIPFILTER_NONE || + mip_filter == PIPE_TEX_MIPFILTER_NEAREST) && + !lod_bias && + !bld->static_state->lod_bias_non_zero && + !bld->static_state->apply_max_lod && + !bld->static_state->apply_min_lod) { + *out_lod_ipart = lp_build_ilog2(float_bld, rho); + *out_lod_fpart = bld->float_bld.zero; + return; } - /* compute lod = log2(rho) */ -#if 0 - lod = lp_build_log2(float_bld, rho); -#else - lod = lp_build_fast_log2(float_bld, rho); -#endif + if (0) { + lod = lp_build_log2(float_bld, rho); + } + else { + lod = lp_build_fast_log2(float_bld, rho); + } /* add shader lod bias */ if (lod_bias) { @@ -288,9 +336,20 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, lod = lp_build_max(float_bld, lod, min_lod); } + } - return lod; + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + LLVMValueRef ipart = lp_build_ifloor(float_bld, lod); + lp_build_name(ipart, "lod_ipart"); + *out_lod_ipart = ipart; + ipart = LLVMBuildSIToFP(bld->builder, ipart, float_bld->vec_type, ""); + *out_lod_fpart = LLVMBuildFSub(bld->builder, lod, ipart, "lod_fpart"); } + else { + *out_lod_ipart = lp_build_iround(float_bld, lod); + } + + return; } @@ -304,10 +363,9 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, void lp_build_nearest_mip_level(struct lp_build_sample_context *bld, unsigned unit, - LLVMValueRef lod, + LLVMValueRef lod_ipart, LLVMValueRef *level_out) { - struct lp_build_context *float_bld = &bld->float_bld; struct lp_build_context *int_bld = &bld->int_bld; LLVMValueRef last_level, level; @@ -317,7 +375,7 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld, bld->builder, unit); /* convert float lod to integer */ - level = lp_build_iround(float_bld, lod); + level = lod_ipart; /* clamp level to legal range of levels */ *level_out = lp_build_clamp(int_bld, level, zero, last_level); @@ -332,12 +390,10 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld, void lp_build_linear_mip_levels(struct lp_build_sample_context *bld, unsigned unit, - LLVMValueRef lod, + LLVMValueRef lod_ipart, LLVMValueRef *level0_out, - LLVMValueRef *level1_out, - LLVMValueRef *weight_out) + LLVMValueRef *level1_out) { - struct lp_build_context *float_bld = &bld->float_bld; struct lp_build_context *int_bld = &bld->int_bld; LLVMValueRef last_level, level; @@ -345,7 +401,7 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld, bld->builder, unit); /* convert float lod to integer */ - lp_build_ifloor_fract(float_bld, lod, &level, weight_out); + level = lod_ipart; /* compute level 0 and clamp to legal range of levels */ *level0_out = lp_build_clamp(int_bld, level, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index bb83ede931..b019c3fa5e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -274,7 +274,7 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, const struct pipe_sampler_state *sampler); -LLVMValueRef +void lp_build_lod_selector(struct lp_build_sample_context *bld, unsigned unit, const LLVMValueRef ddx[4], @@ -283,7 +283,10 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, LLVMValueRef explicit_lod, /* optional */ LLVMValueRef width, LLVMValueRef height, - LLVMValueRef depth); + LLVMValueRef depth, + unsigned mip_filter, + LLVMValueRef *out_lod_ipart, + LLVMValueRef *out_lod_fpart); void lp_build_nearest_mip_level(struct lp_build_sample_context *bld, @@ -294,10 +297,9 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld, void lp_build_linear_mip_levels(struct lp_build_sample_context *bld, unsigned unit, - LLVMValueRef lod, + LLVMValueRef lod_ipart, LLVMValueRef *level0_out, - LLVMValueRef *level1_out, - LLVMValueRef *weight_out); + LLVMValueRef *level1_out); LLVMValueRef lp_build_get_mipmap_level(struct lp_build_sample_context *bld, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c index 49a6eed615..8a55681166 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c @@ -882,13 +882,13 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, LLVMValueRef data_array, LLVMValueRef texel_out[4]) { - struct lp_build_context *float_bld = &bld->float_bld; + struct lp_build_context *int_bld = &bld->int_bld; LLVMBuilderRef builder = bld->builder; const unsigned mip_filter = bld->static_state->min_mip_filter; const unsigned min_filter = bld->static_state->min_img_filter; const unsigned mag_filter = bld->static_state->mag_img_filter; const int dims = texture_dims(bld->static_state->target); - LLVMValueRef lod = NULL, lod_fpart = NULL; + LLVMValueRef lod_ipart = NULL, lod_fpart = NULL; LLVMValueRef ilevel0, ilevel1 = NULL; LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL; LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL; @@ -936,7 +936,6 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, ddy = face_ddy; } - /* * Compute the level of detail (float). */ @@ -945,9 +944,13 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, /* Need to compute lod either to choose mipmap levels or to * distinguish between minification/magnification with one mipmap level. */ - lod = lp_build_lod_selector(bld, unit, ddx, ddy, - lod_bias, explicit_lod, - width, height, depth); + lp_build_lod_selector(bld, unit, ddx, ddy, + lod_bias, explicit_lod, + width, height, depth, + mip_filter, + &lod_ipart, &lod_fpart); + } else { + lod_ipart = LLVMConstInt(LLVMInt32Type(), 0, 0); } /* @@ -966,30 +969,29 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, * We should be able to set ilevel0 = const(0) but that causes * bad x86 code to be emitted. */ - lod = lp_build_const_elem(bld->coord_bld.type, 0.0); - lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); + assert(lod_ipart); + lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0); } else { ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0); } break; case PIPE_TEX_MIPFILTER_NEAREST: - assert(lod); - lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); + assert(lod_ipart); + lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0); break; case PIPE_TEX_MIPFILTER_LINEAR: { LLVMValueRef f256 = LLVMConstReal(LLVMFloatType(), 256.0); - LLVMValueRef i255 = lp_build_const_int32(255); + LLVMTypeRef i32_type = LLVMIntType(32); LLVMTypeRef i16_type = LLVMIntType(16); - assert(lod); + assert(lod_fpart); + + lp_build_linear_mip_levels(bld, unit, lod_ipart, &ilevel0, &ilevel1); - lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1, - &lod_fpart); lod_fpart = LLVMBuildFMul(builder, lod_fpart, f256, ""); - lod_fpart = lp_build_ifloor(&bld->float_bld, lod_fpart); - lod_fpart = LLVMBuildAnd(builder, lod_fpart, i255, ""); + lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, ""); lod_fpart = LLVMBuildTrunc(builder, lod_fpart, i16_type, ""); lod_fpart = lp_build_broadcast_scalar(&h16, lod_fpart); @@ -1049,9 +1051,9 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, lp_build_flow_scope_declare(flow_ctx, &packed_lo); lp_build_flow_scope_declare(flow_ctx, &packed_hi); - /* minify = lod > 0.0 */ - minify = LLVMBuildFCmp(builder, LLVMRealUGE, - lod, float_bld->zero, ""); + /* minify = lod >= 0.0 */ + minify = LLVMBuildICmp(builder, LLVMIntSGE, + lod_ipart, int_bld->zero, ""); lp_build_if(&if_ctx, flow_ctx, builder, minify); { diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index d464147371..4f9bf6763e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -884,12 +884,12 @@ lp_build_sample_general(struct lp_build_sample_context *bld, LLVMValueRef data_array, LLVMValueRef *colors_out) { - struct lp_build_context *float_bld = &bld->float_bld; + struct lp_build_context *int_bld = &bld->int_bld; const unsigned mip_filter = bld->static_state->min_mip_filter; const unsigned min_filter = bld->static_state->min_img_filter; const unsigned mag_filter = bld->static_state->mag_img_filter; const int dims = texture_dims(bld->static_state->target); - LLVMValueRef lod = NULL, lod_fpart = NULL; + LLVMValueRef lod_ipart = NULL, lod_fpart = NULL; LLVMValueRef ilevel0, ilevel1 = NULL; LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL; LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL; @@ -935,9 +935,13 @@ lp_build_sample_general(struct lp_build_sample_context *bld, /* Need to compute lod either to choose mipmap levels or to * distinguish between minification/magnification with one mipmap level. */ - lod = lp_build_lod_selector(bld, unit, ddx, ddy, - lod_bias, explicit_lod, - width, height, depth); + lp_build_lod_selector(bld, unit, ddx, ddy, + lod_bias, explicit_lod, + width, height, depth, + mip_filter, + &lod_ipart, &lod_fpart); + } else { + lod_ipart = LLVMConstInt(LLVMInt32Type(), 0, 0); } /* @@ -950,22 +954,21 @@ lp_build_sample_general(struct lp_build_sample_context *bld, * We should be able to set ilevel0 = const(0) but that causes * bad x86 code to be emitted. */ - lod = lp_build_const_elem(bld->coord_bld.type, 0.0); - lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); + assert(lod_ipart); + lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0); } else { ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0); } } else { - assert(lod); + assert(lod_ipart); if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { - lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); + lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0); } else { assert(mip_filter == PIPE_TEX_MIPFILTER_LINEAR); - lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1, - &lod_fpart); + lp_build_linear_mip_levels(bld, unit, lod_ipart, &ilevel0, &ilevel1); lod_fpart = lp_build_broadcast_scalar(&bld->coord_bld, lod_fpart); } } @@ -1019,9 +1022,9 @@ lp_build_sample_general(struct lp_build_sample_context *bld, lp_build_flow_scope_declare(flow_ctx, &colors_out[2]); lp_build_flow_scope_declare(flow_ctx, &colors_out[3]); - /* minify = lod > 0.0 */ - minify = LLVMBuildFCmp(bld->builder, LLVMRealUGE, - lod, float_bld->zero, ""); + /* minify = lod >= 0.0 */ + minify = LLVMBuildICmp(bld->builder, LLVMIntSGE, + lod_ipart, int_bld->zero, ""); lp_build_if(&if_ctx, flow_ctx, bld->builder, minify); { -- cgit v1.2.3