diff options
author | José Fonseca <jfonseca@vmware.com> | 2010-05-03 17:06:57 +0100 |
---|---|---|
committer | José Fonseca <jfonseca@vmware.com> | 2010-05-04 00:22:16 +0100 |
commit | a9d1a85f81efacb796578c718ddd399310a2e063 (patch) | |
tree | c7dcf6006d908bf08466cc254f699814951825c8 /src | |
parent | 9b02f41cac36286d6838339532c7a95a0615b645 (diff) |
gallicm: Newton-Raphson step to improve precision.
Disabled as it doesn't make VS/PSPrecision DCT happy, and it would
unnecessarily slow some cases where it is not needed.
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_arit.c | 29 |
1 files changed, 27 insertions, 2 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 20ae958714..f372a48846 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1177,9 +1177,34 @@ lp_build_rcp(struct lp_build_context *bld, if(LLVMIsConstant(a)) return LLVMConstFDiv(bld->one, a); - if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) - /* FIXME: improve precision */ + if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) { + /* + * XXX: Added precision is not always necessary, so only enable this + * when we have a better system in place to track minimum precision. + */ + +#if 0 + /* + * Do one Newton-Raphson step to improve precision: + * + * x1 = (2 - a * rcp(a)) * rcp(a) + */ + + LLVMValueRef two = lp_build_const_vec(bld->type, 2.0); + LLVMValueRef rcp_a; + LLVMValueRef res; + + rcp_a = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a); + + res = LLVMBuildMul(bld->builder, a, rcp_a, ""); + res = LLVMBuildSub(bld->builder, two, res, ""); + res = LLVMBuildMul(bld->builder, res, rcp_a, ""); + + return rcp_a; +#else return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a); +#endif + } return LLVMBuildFDiv(bld->builder, bld->one, a, ""); } |