added _mesa_inv_sqrtf() and INV_SQRTF() (Josh Vanderhoof)

author: Brian Paul <brian.paul@tungstengraphics.com> 2003-03-04 16:33:53 +0000
committer: Brian Paul <brian.paul@tungstengraphics.com> 2003-03-04 16:33:53 +0000
commit: f9b1e5241facc8cf255c258082d5cb5b04783e93 (patch)
tree: ea3bb1859dba42e746b0f14dce5193b81c922ac5 /src/mesa/main/imports.c
parent: 386578c5bcc5c6701a6b9692cdc04cfe4064ca06 (diff)
1 files changed, 111 insertions, 1 deletions
diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c
index e9d579e764..8474ed4abc 100644
--- a/src/mesa/main/imports.c
+++ b/src/mesa/main/imports.c
@@ -1,4 +1,4 @@
-/* $Id: imports.c,v 1.32 2003/03/01 01:50:21 brianp Exp $ */
+/* $Id: imports.c,v 1.33 2003/03/04 16:33:53 brianp Exp $ */
 
 /*
  * Mesa 3-D graphics library
@@ -346,6 +346,7 @@ _mesa_sqrtf( float x )
     * then reconstruct the result back into a float
     */
    num.i = ((sqrttab[num.i >> 16]) << 16) | ((e + 127) << 23);
+
    return num.f;
 #else
    return (float) _mesa_sqrtd((double) x);
@@ -353,6 +354,115 @@ _mesa_sqrtf( float x )
 }
 
 
+/**
+ inv_sqrt - A single precision 1/sqrt routine for IEEE format floats.
+ written by Josh Vanderhoof, based on newsgroup posts by James Van Buskirk
+ and Vesa Karvonen.
+*/
+float
+_mesa_inv_sqrtf(float n)
+{
+#if defined(USE_IEEE) && !defined(DEBUG)
+        float r0, x0, y0;
+        float r1, x1, y1;
+        float r2, x2, y2;
+#if 0 /* not used, see below -BP */
+        float r3, x3, y3;
+#endif
+        union { float f; unsigned int i; } u;
+        unsigned int magic;
+
+        /*
+         Exponent part of the magic number -
+
+         We want to:
+         1. subtract the bias from the exponent,
+         2. negate it
+         3. divide by two (rounding towards -inf)
+         4. add the bias back
+
+         Which is the same as subtracting the exponent from 381 and dividing
+         by 2.
+
+         floor(-(x - 127) / 2) + 127 = floor((381 - x) / 2)
+        */
+
+        magic = 381 << 23;
+
+        /*
+         Significand part of magic number -
+
+         With the current magic number, "(magic - u.i) >> 1" will give you:
+
+         for 1 <= u.f <= 2: 1.25 - u.f / 4
+         for 2 <= u.f <= 4: 1.00 - u.f / 8
+
+         This isn't a bad approximation of 1/sqrt.  The maximum difference from
+         1/sqrt will be around .06.  After three Newton-Raphson iterations, the
+         maximum difference is less than 4.5e-8.  (Which is actually close
+         enough to make the following bias academic...)
+
+         To get a better approximation you can add a bias to the magic
+         number.  For example, if you subtract 1/2 of the maximum difference in
+         the first approximation (.03), you will get the following function:
+
+         for 1 <= u.f <= 2:    1.22 - u.f / 4
+         for 2 <= u.f <= 3.76: 0.97 - u.f / 8
+         for 3.76 <= u.f <= 4: 0.72 - u.f / 16
+         (The 3.76 to 4 range is where the result is < .5.)
+
+         This is the closest possible initial approximation, but with a maximum
+         error of 8e-11 after three NR iterations, it is still not perfect.  If
+         you subtract 0.0332281 instead of .03, the maximum error will be
+         2.5e-11 after three NR iterations, which should be about as close as
+         is possible.
+
+         for 1 <= u.f <= 2:    1.2167719 - u.f / 4
+         for 2 <= u.f <= 3.73: 0.9667719 - u.f / 8
+         for 3.73 <= u.f <= 4: 0.7167719 - u.f / 16
+
+        */
+
+        magic -= (int)(0.0332281 * (1 << 25));
+
+        u.f = n;
+        u.i = (magic - u.i) >> 1;
+
+        /*
+         Instead of Newton-Raphson, we use Goldschmidt's algorithm, which
+         allows more parallelism.  From what I understand, the parallelism
+         comes at the cost of less precision, because it lets error
+         accumulate across iterations.
+        */
+        x0 = 1.0f;
+        y0 = 0.5f * n;
+        r0 = u.f;
+
+        x1 = x0 * r0;
+        y1 = y0 * r0 * r0;
+        r1 = 1.5f - y1;
+
+        x2 = x1 * r1;
+        y2 = y1 * r1 * r1;
+        r2 = 1.5f - y2;
+
+#if 1
+        return x2 * r2;  /* we can stop here, and be conformant -BP */
+#else
+        x3 = x2 * r2;
+        y3 = y2 * r2 * r2;
+        r3 = 1.5f - y3;
+
+        return x3 * r3;
+#endif
+#elif defined(XFree86LOADER) && defined(IN_MODULE)
+        return 1.0F / xf86sqrt(n);
+#else
+        return 1.0F / sqrt(n);
+#endif
+}
+
+
 double
 _mesa_pow(double x, double y)
 {
author	Brian Paul <brian.paul@tungstengraphics.com>	2003-03-04 16:33:53 +0000
committer	Brian Paul <brian.paul@tungstengraphics.com>	2003-03-04 16:33:53 +0000
commit	f9b1e5241facc8cf255c258082d5cb5b04783e93 (patch)
tree	ea3bb1859dba42e746b0f14dce5193b81c922ac5 /src/mesa/main/imports.c
parent	386578c5bcc5c6701a6b9692cdc04cfe4064ca06 (diff)