summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJosé Fonseca <jfonseca@vmware.com>2010-05-03 17:06:57 +0100
committerJosé Fonseca <jfonseca@vmware.com>2010-05-04 00:22:16 +0100
commita9d1a85f81efacb796578c718ddd399310a2e063 (patch)
treec7dcf6006d908bf08466cc254f699814951825c8 /src
parent9b02f41cac36286d6838339532c7a95a0615b645 (diff)
gallicm: Newton-Raphson step to improve precision.
Disabled as it doesn't make VS/PSPrecision DCT happy, and it would unnecessarily slow some cases where it is not needed.
Diffstat (limited to 'src')
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c29
1 files changed, 27 insertions, 2 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 20ae958714..f372a48846 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1177,9 +1177,34 @@ lp_build_rcp(struct lp_build_context *bld,
if(LLVMIsConstant(a))
return LLVMConstFDiv(bld->one, a);
- if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
- /* FIXME: improve precision */
+ if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
+ /*
+ * XXX: Added precision is not always necessary, so only enable this
+ * when we have a better system in place to track minimum precision.
+ */
+
+#if 0
+ /*
+ * Do one Newton-Raphson step to improve precision:
+ *
+ * x1 = (2 - a * rcp(a)) * rcp(a)
+ */
+
+ LLVMValueRef two = lp_build_const_vec(bld->type, 2.0);
+ LLVMValueRef rcp_a;
+ LLVMValueRef res;
+
+ rcp_a = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a);
+
+ res = LLVMBuildMul(bld->builder, a, rcp_a, "");
+ res = LLVMBuildSub(bld->builder, two, res, "");
+ res = LLVMBuildMul(bld->builder, res, rcp_a, "");
+
+ return rcp_a;
+#else
return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a);
+#endif
+ }
return LLVMBuildFDiv(bld->builder, bld->one, a, "");
}