1 files changed, 27 insertions, 2 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 20ae958714..f372a48846 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1177,9 +1177,34 @@ lp_build_rcp(struct lp_build_context *bld,
    if(LLVMIsConstant(a))
       return LLVMConstFDiv(bld->one, a);
 
-   if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
-      /* FIXME: improve precision */
+   if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
+      /*
+       * XXX: Added precision is not always necessary, so only enable this
+       * when we have a better system in place to track minimum precision.
+       */
+
+#if 0
+      /*
+       * Do one Newton-Raphson step to improve precision:
+       *
+       *   x1 = (2 - a * rcp(a)) * rcp(a)
+       */
+
+      LLVMValueRef two = lp_build_const_vec(bld->type, 2.0);
+      LLVMValueRef rcp_a;
+      LLVMValueRef res;
+
+      rcp_a = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a);
+
+      res = LLVMBuildMul(bld->builder, a, rcp_a, "");
+      res = LLVMBuildSub(bld->builder, two, res, "");
+      res = LLVMBuildMul(bld->builder, res, rcp_a, "");
+
+      return rcp_a;
+#else
       return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a);
+#endif
+   }
 
    return LLVMBuildFDiv(bld->builder, bld->one, a, "");
 }