diff options
Diffstat (limited to 'src/gallium/auxiliary/gallivm/lp_bld_arit.c')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_arit.c | 356 |
1 files changed, 157 insertions, 199 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index f372a48846..b55c863aa0 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -54,6 +54,7 @@ #include "lp_bld_type.h" #include "lp_bld_const.h" #include "lp_bld_intr.h" +#include "lp_bld_init.h" /* for lp_build_engine */ #include "lp_bld_logic.h" #include "lp_bld_pack.h" #include "lp_bld_debug.h" @@ -239,7 +240,7 @@ lp_build_sum_vector(struct lp_build_context *bld, { const struct lp_type type = bld->type; LLVMValueRef index, res; - int i; + unsigned i; if (a == bld->zero) return bld->zero; @@ -675,26 +676,13 @@ lp_build_abs(struct lp_build_context *bld, if(type.floating) { /* Mask out the sign bit */ - if (type.length == 1) { - LLVMTypeRef int_type = LLVMIntType(type.width); - LLVMTypeRef float_type = LLVMFloatType(); - unsigned long long absMask = ~(1ULL << (type.width - 1)); - LLVMValueRef mask = LLVMConstInt(int_type, absMask, 0); - a = LLVMBuildBitCast(bld->builder, a, int_type, ""); - a = LLVMBuildAnd(bld->builder, a, mask, ""); - a = LLVMBuildBitCast(bld->builder, a, float_type, ""); - return a; - } - else { - /* vector of floats */ - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - unsigned long long absMask = ~(1ULL << (type.width - 1)); - LLVMValueRef mask = lp_build_const_int_vec(type, ((unsigned long long) absMask)); - a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); - a = LLVMBuildAnd(bld->builder, a, mask, ""); - a = LLVMBuildBitCast(bld->builder, a, vec_type, ""); - return a; - } + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + unsigned long long absMask = ~(1ULL << (type.width - 1)); + LLVMValueRef mask = lp_build_const_int_vec(type, ((unsigned long long) absMask)); + a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + a = LLVMBuildAnd(bld->builder, a, mask, ""); + a = LLVMBuildBitCast(bld->builder, a, vec_type, ""); + return a; } if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) { @@ -742,17 +730,9 @@ lp_build_sgn(struct lp_build_context *bld, LLVMValueRef one; unsigned long long maskBit = (unsigned long long)1 << (type.width - 1); - if (type.length == 1) { - int_type = lp_build_int_elem_type(type); - vec_type = lp_build_elem_type(type); - mask = LLVMConstInt(int_type, maskBit, 0); - } - else { - /* vector */ - int_type = lp_build_int_vec_type(type); - vec_type = lp_build_vec_type(type); - mask = lp_build_const_int_vec(type, maskBit); - } + int_type = lp_build_int_vec_type(type); + vec_type = lp_build_vec_type(type); + mask = lp_build_const_int_vec(type, maskBit); /* Take the sign bit and add it to 1 constant */ sign = LLVMBuildBitCast(bld->builder, a, int_type, ""); @@ -819,21 +799,11 @@ lp_build_int_to_float(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); assert(type.floating); - /*assert(lp_check_value(type, a));*/ - if (type.length == 1) { - LLVMTypeRef float_type = LLVMFloatType(); - return LLVMBuildSIToFP(bld->builder, a, float_type, ""); - } - else { - LLVMTypeRef vec_type = lp_build_vec_type(type); - /*LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);*/ - LLVMValueRef res; - res = LLVMBuildSIToFP(bld->builder, a, vec_type, ""); - return res; - } + return LLVMBuildSIToFP(bld->builder, a, vec_type, ""); } @@ -887,7 +857,7 @@ lp_build_trunc(struct lp_build_context *bld, assert(type.floating); assert(lp_check_value(type, a)); - if(util_cpu_caps.has_sse4_1) + if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE); else { LLVMTypeRef vec_type = lp_build_vec_type(type); @@ -909,7 +879,7 @@ lp_build_round(struct lp_build_context *bld, assert(type.floating); assert(lp_check_value(type, a)); - if(util_cpu_caps.has_sse4_1) + if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); else { LLVMTypeRef vec_type = lp_build_vec_type(type); @@ -928,15 +898,9 @@ lp_build_floor(struct lp_build_context *bld, const struct lp_type type = bld->type; assert(type.floating); + assert(lp_check_value(type, a)); - if (type.length == 1) { - LLVMValueRef res; - res = lp_build_ifloor(bld, a); - res = LLVMBuildSIToFP(bld->builder, res, LLVMFloatType(), ""); - return res; - } - - if(util_cpu_caps.has_sse4_1) + if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); else { LLVMTypeRef vec_type = lp_build_vec_type(type); @@ -957,7 +921,7 @@ lp_build_ceil(struct lp_build_context *bld, assert(type.floating); assert(lp_check_value(type, a)); - if(util_cpu_caps.has_sse4_1) + if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); else { LLVMTypeRef vec_type = lp_build_vec_type(type); @@ -991,18 +955,12 @@ lp_build_itrunc(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); assert(type.floating); + assert(lp_check_value(type, a)); - if (type.length == 1) { - LLVMTypeRef int_type = LLVMIntType(type.width); - return LLVMBuildFPToSI(bld->builder, a, int_type, ""); - } - else { - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - assert(lp_check_value(type, a)); - return LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); - } + return LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); } @@ -1019,17 +977,9 @@ lp_build_iround(struct lp_build_context *bld, assert(type.floating); - if (type.length == 1) { - /* scalar float to int */ - LLVMTypeRef int_type = LLVMIntType(type.width); - /* XXX we want rounding here! */ - res = LLVMBuildFPToSI(bld->builder, a, int_type, ""); - return res; - } - assert(lp_check_value(type, a)); - if(util_cpu_caps.has_sse4_1) { + if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) { res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); } else { @@ -1069,17 +1019,9 @@ lp_build_ifloor(struct lp_build_context *bld, LLVMValueRef res; assert(type.floating); - - if (type.length == 1) { - /* scalar float to int */ - LLVMTypeRef int_type = LLVMIntType(type.width); - res = LLVMBuildFPToSI(bld->builder, a, int_type, ""); - return res; - } - assert(lp_check_value(type, a)); - if(util_cpu_caps.has_sse4_1) { + if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) { res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); } else { @@ -1127,10 +1069,11 @@ lp_build_iceil(struct lp_build_context *bld, assert(type.floating); assert(lp_check_value(type, a)); - if(util_cpu_caps.has_sse4_1) { + if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) { res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); } else { + /* TODO: mimic lp_build_ifloor() here */ assert(0); res = bld->undef; } @@ -1228,6 +1171,92 @@ lp_build_rsqrt(struct lp_build_context *bld, } +#ifdef PIPE_OS_WINDOWS + +/* + * XXX: X86 backend translates llvm.cos.v4f32 to 4 calls to CRT's cosf() + * which is neither efficient nor does the CRT linkage work on Windows + * causing segmentation fault. + * + * XXX: With LLVM 2.7 both schemes cause an assertion failure. + */ +static LLVMValueRef +lp_build_sincos(struct lp_build_context *bld, + const char *name, + float (*func)(float), + LLVMValueRef a) +{ + LLVMModuleRef module = + LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld->builder))); + LLVMValueRef function; + LLVMValueRef res; + unsigned i; + + assert(bld->type.floating); + assert(bld->type.width == 32); + + function = LLVMGetNamedFunction(module, name); + if (!function) { + LLVMTypeRef ret_type; + LLVMTypeRef arg_types[1]; + LLVMTypeRef function_type; + + ret_type = LLVMFloatType(); + arg_types[0] = LLVMFloatType(); + function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); + function = LLVMAddFunction(module, name, function_type); + + LLVMSetFunctionCallConv(function, LLVMCCallConv); + LLVMSetLinkage(function, LLVMPrivateLinkage); + + assert(LLVMIsDeclaration(function)); + + LLVMAddGlobalMapping(lp_build_engine, function, func); + } + + res = bld->undef; + + for (i = 0; i < bld->type.length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef args[1]; + LLVMValueRef tmp; + + args[0] = LLVMBuildExtractElement(bld->builder, a, index, ""); + + tmp = LLVMBuildCall(bld->builder, function, args, Elements(args), ""); + + res = LLVMBuildInsertElement(bld->builder, res, tmp, index, ""); + } + + return res; +} + +static float c_cosf( float f ) +{ + return (float) cos( (double) f ); +} + +static float c_sinf( float f ) +{ + return (float) sin( (double) f ); +} + +LLVMValueRef +lp_build_cos(struct lp_build_context *bld, + LLVMValueRef a) +{ + return lp_build_sincos(bld, "cosf", &c_cosf, a); +} + +LLVMValueRef +lp_build_sin(struct lp_build_context *bld, + LLVMValueRef a) +{ + return lp_build_sincos(bld, "sinf", &c_sinf, a); +} + +#else /* !PIPE_OS_WINDOWS */ + /** * Generate cos(a) */ @@ -1235,14 +1264,6 @@ LLVMValueRef lp_build_cos(struct lp_build_context *bld, LLVMValueRef a) { -#ifdef PIPE_OS_WINDOWS - /* - * FIXME: X86 backend translates llvm.cos.v4f32 to 4 calls to CRT's cosf() - * which is neither efficient nor does the CRT linkage work on Windows - * causing segmentation fault. So simply disable the code for now. - */ - return bld->one; -#else const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); char intrinsic[32]; @@ -1253,7 +1274,6 @@ lp_build_cos(struct lp_build_context *bld, util_snprintf(intrinsic, sizeof intrinsic, "llvm.cos.v%uf%u", type.length, type.width); return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a); -#endif } @@ -1264,14 +1284,6 @@ LLVMValueRef lp_build_sin(struct lp_build_context *bld, LLVMValueRef a) { -#ifdef PIPE_OS_WINDOWS - /* - * FIXME: X86 backend translates llvm.sin.v4f32 to 4 calls to CRT's sinf() - * which is neither efficient nor does the CRT linkage work on Windows - * causing segmentation fault. So simply disable the code for now. - */ - return bld->zero; -#else const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); char intrinsic[32]; @@ -1282,9 +1294,10 @@ lp_build_sin(struct lp_build_context *bld, util_snprintf(intrinsic, sizeof intrinsic, "llvm.sin.v%uf%u", type.length, type.width); return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a); -#endif } +#endif /* !PIPE_OS_WINDOWS */ + /** * Generate pow(x, y) @@ -1346,7 +1359,6 @@ lp_build_polynomial(struct lp_build_context *bld, unsigned num_coeffs) { const struct lp_type type = bld->type; - LLVMTypeRef float_type = LLVMFloatType(); LLVMValueRef res = NULL; unsigned i; @@ -1358,10 +1370,7 @@ lp_build_polynomial(struct lp_build_context *bld, for (i = num_coeffs; i--; ) { LLVMValueRef coeff; - if (type.length == 1) - coeff = LLVMConstReal(float_type, coeffs[i]); - else - coeff = lp_build_const_vec(type, coeffs[i]); + coeff = lp_build_const_vec(type, coeffs[i]); if(res) res = lp_build_add(bld, coeff, lp_build_mul(bld, x, res)); @@ -1377,17 +1386,31 @@ lp_build_polynomial(struct lp_build_context *bld, /** - * Minimax polynomial fit of 2**x, in range [-0.5, 0.5[ + * Minimax polynomial fit of 2**x, in range [0, 1[ */ const double lp_build_exp2_polynomial[] = { #if EXP_POLY_DEGREE == 5 - 9.9999994e-1, 6.9315308e-1, 2.4015361e-1, 5.5826318e-2, 8.9893397e-3, 1.8775767e-3 + 0.999999999690134838155, + 0.583974334321735217258, + 0.164553105719676828492, + 0.0292811063701710962255, + 0.00354944426657875141846, + 0.000296253726543423377365 #elif EXP_POLY_DEGREE == 4 - 1.0000026, 6.9300383e-1, 2.4144275e-1, 5.2011464e-2, 1.3534167e-2 + 1.00000001502262084505, + 0.563586057338685991394, + 0.150436017652442413623, + 0.0243220604213317927308, + 0.0025359088446580436489 #elif EXP_POLY_DEGREE == 3 - 9.9992520e-1, 6.9583356e-1, 2.2606716e-1, 7.8024521e-2 + 0.999925218562710312959, + 0.695833540494823811697, + 0.226067155427249155588, + 0.0780245226406372992967 #elif EXP_POLY_DEGREE == 2 - 1.0017247, 6.5763628e-1, 3.3718944e-1 + 1.00172476321474503578, + 0.657636275736077639316, + 0.33718943461968720704 #else #error #endif @@ -1421,17 +1444,16 @@ lp_build_exp2_approx(struct lp_build_context *bld, x = lp_build_min(bld, x, lp_build_const_vec(type, 129.0)); x = lp_build_max(bld, x, lp_build_const_vec(type, -126.99999)); - /* ipart = int(x - 0.5) */ - ipart = LLVMBuildSub(bld->builder, x, lp_build_const_vec(type, 0.5f), ""); - ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, ""); + /* ipart = floor(x) */ + ipart = lp_build_floor(bld, x); /* fpart = x - ipart */ - fpart = LLVMBuildSIToFP(bld->builder, ipart, vec_type, ""); - fpart = LLVMBuildSub(bld->builder, x, fpart, ""); + fpart = LLVMBuildSub(bld->builder, x, ipart, ""); } if(p_exp2_int_part || p_exp2) { /* expipart = (float) (1 << ipart) */ + ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, ""); expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_const_int_vec(type, 127), ""); expipart = LLVMBuildShl(bld->builder, expipart, lp_build_const_int_vec(type, 23), ""); expipart = LLVMBuildBitCast(bld->builder, expipart, vec_type, ""); @@ -1472,13 +1494,27 @@ lp_build_exp2(struct lp_build_context *bld, */ const double lp_build_log2_polynomial[] = { #if LOG_POLY_DEGREE == 6 - 3.11578814719469302614, -3.32419399085241980044, 2.59883907202499966007, -1.23152682416275988241, 0.318212422185251071475, -0.0344359067839062357313 + 3.11578814719469302614, + -3.32419399085241980044, + 2.59883907202499966007, + -1.23152682416275988241, + 0.318212422185251071475, + -0.0344359067839062357313 #elif LOG_POLY_DEGREE == 5 - 2.8882704548164776201, -2.52074962577807006663, 1.48116647521213171641, -0.465725644288844778798, 0.0596515482674574969533 + 2.8882704548164776201, + -2.52074962577807006663, + 1.48116647521213171641, + -0.465725644288844778798, + 0.0596515482674574969533 #elif LOG_POLY_DEGREE == 4 - 2.61761038894603480148, -1.75647175389045657003, 0.688243882994381274313, -0.107254423828329604454 + 2.61761038894603480148, + -1.75647175389045657003, + 0.688243882994381274313, + -0.107254423828329604454 #elif LOG_POLY_DEGREE == 3 - 2.28330284476918490682, -1.04913055217340124191, 0.204446009836232697516 + 2.28330284476918490682, + -1.04913055217340124191, + 0.204446009836232697516 #else #error #endif @@ -1558,89 +1594,11 @@ lp_build_log2_approx(struct lp_build_context *bld, } -/** scalar version of above function */ -static void -lp_build_float_log2_approx(struct lp_build_context *bld, - LLVMValueRef x, - LLVMValueRef *p_exp, - LLVMValueRef *p_floor_log2, - LLVMValueRef *p_log2) -{ - const struct lp_type type = bld->type; - LLVMTypeRef float_type = LLVMFloatType(); - LLVMTypeRef int_type = LLVMIntType(type.width); - - LLVMValueRef expmask = LLVMConstInt(int_type, 0x7f800000, 0); - LLVMValueRef mantmask = LLVMConstInt(int_type, 0x007fffff, 0); - LLVMValueRef one = LLVMConstBitCast(bld->one, int_type); - - LLVMValueRef i = NULL; - LLVMValueRef exp = NULL; - LLVMValueRef mant = NULL; - LLVMValueRef logexp = NULL; - LLVMValueRef logmant = NULL; - LLVMValueRef res = NULL; - - if(p_exp || p_floor_log2 || p_log2) { - /* TODO: optimize the constant case */ - if(LLVMIsConstant(x)) - debug_printf("%s: inefficient/imprecise constant arithmetic\n", - __FUNCTION__); - - assert(type.floating && type.width == 32); - - i = LLVMBuildBitCast(bld->builder, x, int_type, ""); - - /* exp = (float) exponent(x) */ - exp = LLVMBuildAnd(bld->builder, i, expmask, ""); - } - - if(p_floor_log2 || p_log2) { - LLVMValueRef c23 = LLVMConstInt(int_type, 23, 0); - LLVMValueRef c127 = LLVMConstInt(int_type, 127, 0); - logexp = LLVMBuildLShr(bld->builder, exp, c23, ""); - logexp = LLVMBuildSub(bld->builder, logexp, c127, ""); - logexp = LLVMBuildSIToFP(bld->builder, logexp, float_type, ""); - } - - if(p_log2) { - /* mant = (float) mantissa(x) */ - mant = LLVMBuildAnd(bld->builder, i, mantmask, ""); - mant = LLVMBuildOr(bld->builder, mant, one, ""); - mant = LLVMBuildBitCast(bld->builder, mant, float_type, ""); - - logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial, - Elements(lp_build_log2_polynomial)); - - /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/ - logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), ""); - - res = LLVMBuildAdd(bld->builder, logmant, logexp, ""); - } - - if(p_exp) { - exp = LLVMBuildBitCast(bld->builder, exp, float_type, ""); - *p_exp = exp; - } - - if(p_floor_log2) - *p_floor_log2 = logexp; - - if(p_log2) - *p_log2 = res; -} - - LLVMValueRef lp_build_log2(struct lp_build_context *bld, LLVMValueRef x) { LLVMValueRef res; - if (bld->type.length == 1) { - lp_build_float_log2_approx(bld, x, NULL, NULL, &res); - } - else { - lp_build_log2_approx(bld, x, NULL, NULL, &res); - } + lp_build_log2_approx(bld, x, NULL, NULL, &res); return res; } |