diff options
Diffstat (limited to 'src/mesa')
| -rw-r--r-- | src/mesa/main/imports.c | 112 | ||||
| -rw-r--r-- | src/mesa/main/imports.h | 15 | ||||
| -rw-r--r-- | src/mesa/main/macros.h | 4 | ||||
| -rw-r--r-- | src/mesa/main/nvvertexec.c | 4 | ||||
| -rw-r--r-- | src/mesa/math/m_norm_tmp.h | 26 | ||||
| -rw-r--r-- | src/mesa/swrast/s_aalinetemp.h | 4 | ||||
| -rw-r--r-- | src/mesa/swrast/s_nvfragprog.c | 5 | ||||
| -rw-r--r-- | src/mesa/swrast/s_span.c | 6 | ||||
| -rw-r--r-- | src/mesa/tnl/t_vb_texgen.c | 6 | 
9 files changed, 152 insertions, 30 deletions
| diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c index e9d579e764..8474ed4abc 100644 --- a/src/mesa/main/imports.c +++ b/src/mesa/main/imports.c @@ -1,4 +1,4 @@ -/* $Id: imports.c,v 1.32 2003/03/01 01:50:21 brianp Exp $ */ +/* $Id: imports.c,v 1.33 2003/03/04 16:33:53 brianp Exp $ */  /*   * Mesa 3-D graphics library @@ -346,6 +346,7 @@ _mesa_sqrtf( float x )      * then reconstruct the result back into a float      */     num.i = ((sqrttab[num.i >> 16]) << 16) | ((e + 127) << 23); +     return num.f;  #else     return (float) _mesa_sqrtd((double) x); @@ -353,6 +354,115 @@ _mesa_sqrtf( float x )  } +/** + inv_sqrt - A single precision 1/sqrt routine for IEEE format floats. + written by Josh Vanderhoof, based on newsgroup posts by James Van Buskirk + and Vesa Karvonen. +*/ +float +_mesa_inv_sqrtf(float n) +{ +#if defined(USE_IEEE) && !defined(DEBUG) +        float r0, x0, y0; +        float r1, x1, y1; +        float r2, x2, y2; +#if 0 /* not used, see below -BP */ +        float r3, x3, y3; +#endif +        union { float f; unsigned int i; } u; +        unsigned int magic; + +        /* +         Exponent part of the magic number - + +         We want to: +         1. subtract the bias from the exponent, +         2. negate it +         3. divide by two (rounding towards -inf) +         4. add the bias back + +         Which is the same as subtracting the exponent from 381 and dividing +         by 2. + +         floor(-(x - 127) / 2) + 127 = floor((381 - x) / 2) +        */ + +        magic = 381 << 23; + +        /* +         Significand part of magic number - + +         With the current magic number, "(magic - u.i) >> 1" will give you: + +         for 1 <= u.f <= 2: 1.25 - u.f / 4 +         for 2 <= u.f <= 4: 1.00 - u.f / 8 + +         This isn't a bad approximation of 1/sqrt.  The maximum difference from +         1/sqrt will be around .06.  After three Newton-Raphson iterations, the +         maximum difference is less than 4.5e-8.  (Which is actually close +         enough to make the following bias academic...) + +         To get a better approximation you can add a bias to the magic +         number.  For example, if you subtract 1/2 of the maximum difference in +         the first approximation (.03), you will get the following function: + +         for 1 <= u.f <= 2:    1.22 - u.f / 4 +         for 2 <= u.f <= 3.76: 0.97 - u.f / 8 +         for 3.76 <= u.f <= 4: 0.72 - u.f / 16 +         (The 3.76 to 4 range is where the result is < .5.) + +         This is the closest possible initial approximation, but with a maximum +         error of 8e-11 after three NR iterations, it is still not perfect.  If +         you subtract 0.0332281 instead of .03, the maximum error will be +         2.5e-11 after three NR iterations, which should be about as close as +         is possible. + +         for 1 <= u.f <= 2:    1.2167719 - u.f / 4 +         for 2 <= u.f <= 3.73: 0.9667719 - u.f / 8 +         for 3.73 <= u.f <= 4: 0.7167719 - u.f / 16 + +        */ + +        magic -= (int)(0.0332281 * (1 << 25)); + +        u.f = n; +        u.i = (magic - u.i) >> 1; + +        /* +         Instead of Newton-Raphson, we use Goldschmidt's algorithm, which +         allows more parallelism.  From what I understand, the parallelism +         comes at the cost of less precision, because it lets error +         accumulate across iterations. +        */ +        x0 = 1.0f; +        y0 = 0.5f * n; +        r0 = u.f; + +        x1 = x0 * r0; +        y1 = y0 * r0 * r0; +        r1 = 1.5f - y1; + +        x2 = x1 * r1; +        y2 = y1 * r1 * r1; +        r2 = 1.5f - y2; + +#if 1 +        return x2 * r2;  /* we can stop here, and be conformant -BP */ +#else +        x3 = x2 * r2; +        y3 = y2 * r2 * r2; +        r3 = 1.5f - y3; + +        return x3 * r3; +#endif +#elif defined(XFree86LOADER) && defined(IN_MODULE) +        return 1.0F / xf86sqrt(n); +#else +        return 1.0F / sqrt(n); +#endif +} + +  double  _mesa_pow(double x, double y)  { diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h index 354819f43f..73c3b6eb42 100644 --- a/src/mesa/main/imports.h +++ b/src/mesa/main/imports.h @@ -1,4 +1,4 @@ -/* $Id: imports.h,v 1.16 2003/03/03 21:44:39 brianp Exp $ */ +/* $Id: imports.h,v 1.17 2003/03/04 16:33:53 brianp Exp $ */  /*   * Mesa 3-D graphics library @@ -169,6 +169,16 @@ float asm_sqrt (float x);  /*** + *** INV_SQRTF: single-precision inverse square root + ***/ +#if 0 +#define INV_SQRTF(X) _mesa_inv_sqrt(X) +#else +#define INV_SQRTF(X) (1.0F / SQRTF(X))  /* this is faster on a P4 */ +#endif + + +/***   *** LOG2: Log base 2 of float   ***/  #ifdef USE_IEEE @@ -588,6 +598,9 @@ _mesa_sqrtd(double x);  extern float  _mesa_sqrtf(float x); +extern float +_mesa_inv_sqrtf(float x); +  extern double  _mesa_pow(double x, double y); diff --git a/src/mesa/main/macros.h b/src/mesa/main/macros.h index 0be9510995..63bd02bdff 100644 --- a/src/mesa/main/macros.h +++ b/src/mesa/main/macros.h @@ -1,4 +1,4 @@ -/* $Id: macros.h,v 1.31 2003/03/01 01:50:21 brianp Exp $ */ +/* $Id: macros.h,v 1.32 2003/03/04 16:33:54 brianp Exp $ */  /*   * Mesa 3-D graphics library @@ -572,7 +572,7 @@ do {						\  do {						\     GLfloat len = (GLfloat) LEN_SQUARED_3FV(V);	\     if (len) {					\ -      len = (GLfloat) (1.0 / SQRTF(len));	\ +      len = INV_SQRTF(len);			\        (V)[0] = (GLfloat) ((V)[0] * len);	\        (V)[1] = (GLfloat) ((V)[1] * len);	\        (V)[2] = (GLfloat) ((V)[2] * len);	\ diff --git a/src/mesa/main/nvvertexec.c b/src/mesa/main/nvvertexec.c index 72ebcffca9..0702927354 100644 --- a/src/mesa/main/nvvertexec.c +++ b/src/mesa/main/nvvertexec.c @@ -1,4 +1,4 @@ -/* $Id: nvvertexec.c,v 1.2 2003/03/01 01:50:22 brianp Exp $ */ +/* $Id: nvvertexec.c,v 1.3 2003/03/04 16:33:55 brianp Exp $ */  /*   * Mesa 3-D graphics library @@ -380,7 +380,7 @@ _mesa_exec_vertex_program(GLcontext *ctx, const struct vertex_program *program)              {                 GLfloat t[4];                 fetch_vector1( &inst->SrcReg[0], machine, t ); -               t[0] = (float) (1.0 / sqrt(fabs(t[0]))); +               t[0] = INV_SQRTF(FABSF(t[0]));                 t[1] = t[2] = t[3] = t[0];                 store_vector4( &inst->DstReg, machine, t );              } diff --git a/src/mesa/math/m_norm_tmp.h b/src/mesa/math/m_norm_tmp.h index 2da7bbd676..5f24f5a2a2 100644 --- a/src/mesa/math/m_norm_tmp.h +++ b/src/mesa/math/m_norm_tmp.h @@ -1,4 +1,4 @@ -/* $Id: m_norm_tmp.h,v 1.13 2003/03/01 01:50:24 brianp Exp $ */ +/* $Id: m_norm_tmp.h,v 1.14 2003/03/04 16:34:01 brianp Exp $ */  /*   * Mesa 3-D graphics library @@ -69,10 +69,10 @@ TAG(transform_normalize_normals)( const GLmatrix *mat,  	 {  	    GLdouble len = tx*tx + ty*ty + tz*tz;  	    if (len > 1e-20) { -	       GLdouble scale = 1.0F / SQRTF(len); -	       out[i][0] = (GLfloat) (tx * scale); -	       out[i][1] = (GLfloat) (ty * scale); -	       out[i][2] = (GLfloat) (tz * scale); +	       GLfloat scale = INV_SQRTF(len); +	       out[i][0] = tx * scale; +	       out[i][1] = ty * scale; +	       out[i][2] = tz * scale;  	    }  	    else {  	       out[i][0] = out[i][1] = out[i][2] = 0; @@ -136,10 +136,10 @@ TAG(transform_normalize_normals_no_rot)( const GLmatrix *mat,  	 {  	    GLdouble len = tx*tx + ty*ty + tz*tz;  	    if (len > 1e-20) { -	       GLdouble scale = 1.0F / SQRTF(len); -	       out[i][0] = (GLfloat) (tx * scale); -	       out[i][1] = (GLfloat) (ty * scale); -	       out[i][2] = (GLfloat) (tz * scale); +	       GLfloat scale = INV_SQRTF(len); +	       out[i][0] = tx * scale; +	       out[i][1] = ty * scale; +	       out[i][2] = tz * scale;  	    }  	    else {  	       out[i][0] = out[i][1] = out[i][2] = 0; @@ -323,10 +323,10 @@ TAG(normalize_normals)( const GLmatrix *mat,  	 const GLfloat x = from[0], y = from[1], z = from[2];  	 GLdouble len = x * x + y * y + z * z;  	 if (len > 1e-50) { -	    len = 1.0F / SQRTF(len); -	    out[i][0] = (GLfloat) (x * len); -	    out[i][1] = (GLfloat) (y * len); -	    out[i][2] = (GLfloat) (z * len); +	    len = INV_SQRTF(len); +	    out[i][0] = x * len; +	    out[i][1] = y * len; +	    out[i][2] = z * len;  	 }  	 else {  	    out[i][0] = x; diff --git a/src/mesa/swrast/s_aalinetemp.h b/src/mesa/swrast/s_aalinetemp.h index 529da48110..9850ff9ac5 100644 --- a/src/mesa/swrast/s_aalinetemp.h +++ b/src/mesa/swrast/s_aalinetemp.h @@ -1,4 +1,4 @@ -/* $Id: s_aalinetemp.h,v 1.22 2003/02/21 21:00:27 brianp Exp $ */ +/* $Id: s_aalinetemp.h,v 1.23 2003/03/04 16:34:02 brianp Exp $ */  /*   * Mesa 3-D graphics library @@ -132,7 +132,7 @@ NAME(line)(GLcontext *ctx, const SWvertex *v0, const SWvertex *v1)     line.y1 = v1->win[1];     line.dx = line.x1 - line.x0;     line.dy = line.y1 - line.y0; -   line.len = (GLfloat) sqrt(line.dx * line.dx + line.dy * line.dy); +   line.len = SQRTF(line.dx * line.dx + line.dy * line.dy);     line.halfWidth = 0.5F * ctx->Line.Width;     if (line.len == 0.0 || IS_INF_OR_NAN(line.len)) diff --git a/src/mesa/swrast/s_nvfragprog.c b/src/mesa/swrast/s_nvfragprog.c index 97f65c0479..147b5248d4 100644 --- a/src/mesa/swrast/s_nvfragprog.c +++ b/src/mesa/swrast/s_nvfragprog.c @@ -1,4 +1,4 @@ -/* $Id: s_nvfragprog.c,v 1.5 2003/03/01 01:50:26 brianp Exp $ */ +/* $Id: s_nvfragprog.c,v 1.6 2003/03/04 16:34:03 brianp Exp $ */  /*   * Mesa 3-D graphics library @@ -582,8 +582,7 @@ execute_program(GLcontext *ctx, const struct fragment_program *program)              {                 GLfloat a[4], result[4];                 fetch_vector1( &inst->SrcReg[0], machine, a ); -               result[0] = result[1] = result[2] = result[3] -                  = 1.0F / SQRTF(a[0]); +               result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);                 store_vector4( inst, machine, result );              }              break; diff --git a/src/mesa/swrast/s_span.c b/src/mesa/swrast/s_span.c index d408151803..2106b87c5b 100644 --- a/src/mesa/swrast/s_span.c +++ b/src/mesa/swrast/s_span.c @@ -1,4 +1,4 @@ -/* $Id: s_span.c,v 1.56 2003/03/01 01:50:26 brianp Exp $ */ +/* $Id: s_span.c,v 1.57 2003/03/04 16:34:03 brianp Exp $ */  /*   * Mesa 3-D graphics library @@ -309,8 +309,8 @@ compute_lambda(GLfloat dsdx, GLfloat dsdy, GLfloat dtdx, GLfloat dtdy,     GLfloat dvdx = texH * ((t + dtdx) / (q + dqdx) - t * invQ);     GLfloat dudy = texW * ((s + dsdy) / (q + dqdy) - s * invQ);     GLfloat dvdy = texH * ((t + dtdy) / (q + dqdy) - t * invQ); -   GLfloat x = sqrt(dudx * dudx + dvdx * dvdx); -   GLfloat y = sqrt(dudy * dudy + dvdy * dvdy); +   GLfloat x = SQRTF(dudx * dudx + dvdx * dvdx); +   GLfloat y = SQRTF(dudy * dudy + dvdy * dvdy);     GLfloat rho = MAX2(x, y);     GLfloat lambda = LOG2(rho);     return lambda; diff --git a/src/mesa/tnl/t_vb_texgen.c b/src/mesa/tnl/t_vb_texgen.c index 74ad17356b..f2454e6ffb 100644 --- a/src/mesa/tnl/t_vb_texgen.c +++ b/src/mesa/tnl/t_vb_texgen.c @@ -1,4 +1,4 @@ -/* $Id: t_vb_texgen.c,v 1.17 2003/03/01 01:50:27 brianp Exp $ */ +/* $Id: t_vb_texgen.c,v 1.18 2003/03/04 16:34:04 brianp Exp $ */  /*   * Mesa 3-D graphics library @@ -113,7 +113,7 @@ static void build_m3( GLfloat f[][3], GLfloat m[],        fz = f[i][2] = u[2] - norm[2] * two_nu;        m[i] = fx * fx + fy * fy + (fz + 1.0F) * (fz + 1.0F);        if (m[i] != 0.0F) { -	 m[i] = 0.5F / SQRTF(m[i]); +	 m[i] = 0.5F * _mesa_inv_sqrtf(m[i]);        }     }  } @@ -142,7 +142,7 @@ static void build_m2( GLfloat f[][3], GLfloat m[],        fz = f[i][2] = u[2] - norm[2] * two_nu;        m[i] = fx * fx + fy * fy + (fz + 1.0F) * (fz + 1.0F);        if (m[i] != 0.0F) { -	 m[i] = 0.5F / SQRTF(m[i]); +	 m[i] = 0.5F * _mesa_inv_sqrtf(m[i]);        }     }  } | 
