summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/gallivm/lp_bld_arit.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/gallivm/lp_bld_arit.c')
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c29
1 files changed, 27 insertions, 2 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 20ae958714..f372a48846 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1177,9 +1177,34 @@ lp_build_rcp(struct lp_build_context *bld,
if(LLVMIsConstant(a))
return LLVMConstFDiv(bld->one, a);
- if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
- /* FIXME: improve precision */
+ if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
+ /*
+ * XXX: Added precision is not always necessary, so only enable this
+ * when we have a better system in place to track minimum precision.
+ */
+
+#if 0
+ /*
+ * Do one Newton-Raphson step to improve precision:
+ *
+ * x1 = (2 - a * rcp(a)) * rcp(a)
+ */
+
+ LLVMValueRef two = lp_build_const_vec(bld->type, 2.0);
+ LLVMValueRef rcp_a;
+ LLVMValueRef res;
+
+ rcp_a = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a);
+
+ res = LLVMBuildMul(bld->builder, a, rcp_a, "");
+ res = LLVMBuildSub(bld->builder, two, res, "");
+ res = LLVMBuildMul(bld->builder, res, rcp_a, "");
+
+ return rcp_a;
+#else
return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a);
+#endif
+ }
return LLVMBuildFDiv(bld->builder, bld->one, a, "");
}