summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrian Paul <brian.paul@tungstengraphics.com>2002-02-17 01:49:31 +0000
committerBrian Paul <brian.paul@tungstengraphics.com>2002-02-17 01:49:31 +0000
commitc14a5a6c6285b29860a722359faa11a16da4eac9 (patch)
tree88c7c4a293e1ed65dfafba29a2e1bc40a9ca6700
parent8baae48fb7e8cdc3490b9e090c1a7e321b243992 (diff)
Optimized the interpolate_texcoords() function:
Use fast approximation to log(). Check for dq==0 to avoid a per-pixel divide.
-rw-r--r--src/mesa/swrast/s_span.c186
1 files changed, 148 insertions, 38 deletions
diff --git a/src/mesa/swrast/s_span.c b/src/mesa/swrast/s_span.c
index c46064991e..4ed37c6604 100644
--- a/src/mesa/swrast/s_span.c
+++ b/src/mesa/swrast/s_span.c
@@ -1,4 +1,4 @@
-/* $Id: s_span.c,v 1.34 2002/02/15 16:27:13 brianp Exp $ */
+/* $Id: s_span.c,v 1.35 2002/02/17 01:49:31 brianp Exp $ */
/*
* Mesa 3-D graphics library
@@ -262,8 +262,58 @@ interpolate_z(GLcontext *ctx, struct sw_span *span)
}
+/*
+ * Return log_base_2(x) / 2.
+ * We divide by two here since we didn't square rho in the triangle function.
+ */
+#ifdef USE_IEEE
+
+#if 0
+/* This is pretty fast, but not accurate enough (only 2 fractional bits).
+ * Based on code from http://www.stereopsis.com/log2.html
+ */
+static INLINE GLfloat HALF_LOG2(GLfloat x)
+{
+ const GLfloat y = x * x * x * x;
+ const GLuint ix = *((GLuint *) &y);
+ const GLuint exp = (ix >> 23) & 0xFF;
+ const GLint log2 = ((GLint) exp) - 127;
+ return (GLfloat) log2 * (0.5 / 4.0); /* 4, because of x^4 above */
+}
+#endif
+
+/* Pretty fast, and accurate.
+ * Based on code from http://www.flipcode.com/totd/
+ */
+static INLINE GLfloat HALF_LOG2(GLfloat val)
+{
+ GLint *exp_ptr = (GLint *) &val;
+ GLint x = *exp_ptr;
+ const GLint log_2 = ((x >> 23) & 255) - 128;
+ x &= ~(255 << 23);
+ x += 127 << 23;
+ *exp_ptr = x;
+ val = ((-1.0f/3) * val + 2) * val - 2.0f/3;
+ return 0.5F * (val + log_2);
+}
+
+#else /* USE_IEEE */
+
+/* Slow, portable solution.
+ * NOTE: log_base_2(x) = log(x) / log(2)
+ * NOTE: 1.442695 = 1/log(2).
+ */
+#define HALF_LOG2(x) ((GLfloat) (log(x) * (1.442695F * 0.5F)))
+
+#endif /* USE_IEEE */
-/* Fill in the span.texcoords array from the interpolation values */
+
+
+/*
+ * Fill in the span.texcoords array from the interpolation values.
+ * XXX We could optimize here for the case when dq = 0. That would
+ * usually be the case when using an orthographic projection.
+ */
static void
interpolate_texcoords(GLcontext *ctx, struct sw_span *span)
{
@@ -275,6 +325,7 @@ interpolate_texcoords(GLcontext *ctx, struct sw_span *span)
GLuint u;
for (u = 0; u < ctx->Const.MaxTextureUnits; u++) {
if (ctx->Texture.Unit[u]._ReallyEnabled) {
+ const GLfloat rho = span->rho[u];
const GLfloat ds = span->texStep[u][0];
const GLfloat dt = span->texStep[u][1];
const GLfloat dr = span->texStep[u][2];
@@ -284,17 +335,32 @@ interpolate_texcoords(GLcontext *ctx, struct sw_span *span)
GLfloat r = span->tex[u][2];
GLfloat q = span->tex[u][3];
GLuint i;
- for (i = 0; i < span->end; i++) {
+ if (dq == 0.0) {
+ /* Ortho projection or polygon's parallel to window X axis */
const GLfloat invQ = (q == 0.0F) ? 1.0F : (1.0F / q);
- span->texcoords[u][i][0] = s * invQ;
- span->texcoords[u][i][1] = t * invQ;
- span->texcoords[u][i][2] = r * invQ;
- span->lambda[u][i] = (GLfloat)
- (log(span->rho[u] * invQ * invQ) * 1.442695F * 0.5F);
- s += ds;
- t += dt;
- r += dr;
- q += dq;
+ const GLfloat lambda = HALF_LOG2(rho * invQ * invQ);
+ for (i = 0; i < span->end; i++) {
+ span->texcoords[u][i][0] = s * invQ;
+ span->texcoords[u][i][1] = t * invQ;
+ span->texcoords[u][i][2] = r * invQ;
+ span->lambda[u][i] = lambda;
+ s += ds;
+ t += dt;
+ r += dr;
+ }
+ }
+ else {
+ for (i = 0; i < span->end; i++) {
+ const GLfloat invQ = (q == 0.0F) ? 1.0F : (1.0F / q);
+ span->texcoords[u][i][0] = s * invQ;
+ span->texcoords[u][i][1] = t * invQ;
+ span->texcoords[u][i][2] = r * invQ;
+ span->lambda[u][i] = HALF_LOG2(rho * invQ * invQ);
+ s += ds;
+ t += dt;
+ r += dr;
+ q += dq;
+ }
}
}
}
@@ -314,15 +380,29 @@ interpolate_texcoords(GLcontext *ctx, struct sw_span *span)
GLfloat r = span->tex[u][2];
GLfloat q = span->tex[u][3];
GLuint i;
- for (i = 0; i < span->end; i++) {
+ if (dq == 0.0) {
+ /* Ortho projection or polygon's parallel to window X axis */
const GLfloat invQ = (q == 0.0F) ? 1.0F : (1.0F / q);
- span->texcoords[u][i][0] = s * invQ;
- span->texcoords[u][i][1] = t * invQ;
- span->texcoords[u][i][2] = r * invQ;
- s += ds;
- t += dt;
- r += dr;
- q += dq;
+ for (i = 0; i < span->end; i++) {
+ span->texcoords[u][i][0] = s * invQ;
+ span->texcoords[u][i][1] = t * invQ;
+ span->texcoords[u][i][2] = r * invQ;
+ s += ds;
+ t += dt;
+ r += dr;
+ }
+ }
+ else {
+ for (i = 0; i < span->end; i++) {
+ const GLfloat invQ = (q == 0.0F) ? 1.0F : (1.0F / q);
+ span->texcoords[u][i][0] = s * invQ;
+ span->texcoords[u][i][1] = t * invQ;
+ span->texcoords[u][i][2] = r * invQ;
+ s += ds;
+ t += dt;
+ r += dr;
+ q += dq;
+ }
}
}
}
@@ -331,6 +411,7 @@ interpolate_texcoords(GLcontext *ctx, struct sw_span *span)
else {
if (span->interpMask & SPAN_LAMBDA) {
/* just texture unit 0, with lambda */
+ const GLfloat rho = span->rho[0];
const GLfloat ds = span->texStep[0][0];
const GLfloat dt = span->texStep[0][1];
const GLfloat dr = span->texStep[0][2];
@@ -340,17 +421,32 @@ interpolate_texcoords(GLcontext *ctx, struct sw_span *span)
GLfloat r = span->tex[0][2];
GLfloat q = span->tex[0][3];
GLuint i;
- for (i = 0; i < span->end; i++) {
+ if (dq == 0.0) {
+ /* Ortho projection or polygon's parallel to window X axis */
const GLfloat invQ = (q == 0.0F) ? 1.0F : (1.0F / q);
- span->texcoords[0][i][0] = s * invQ;
- span->texcoords[0][i][1] = t * invQ;
- span->texcoords[0][i][2] = r * invQ;
- span->lambda[0][i] = (GLfloat)
- (log(span->rho[0] * invQ * invQ) * 1.442695F * 0.5F);
- s += ds;
- t += dt;
- r += dr;
- q += dq;
+ const GLfloat lambda = HALF_LOG2(rho * invQ * invQ);
+ for (i = 0; i < span->end; i++) {
+ span->texcoords[0][i][0] = s * invQ;
+ span->texcoords[0][i][1] = t * invQ;
+ span->texcoords[0][i][2] = r * invQ;
+ span->lambda[0][i] = lambda;
+ s += ds;
+ t += dt;
+ r += dr;
+ }
+ }
+ else {
+ for (i = 0; i < span->end; i++) {
+ const GLfloat invQ = (q == 0.0F) ? 1.0F : (1.0F / q);
+ span->texcoords[0][i][0] = s * invQ;
+ span->texcoords[0][i][1] = t * invQ;
+ span->texcoords[0][i][2] = r * invQ;
+ span->lambda[0][i] = HALF_LOG2(rho * invQ * invQ);
+ s += ds;
+ t += dt;
+ r += dr;
+ q += dq;
+ }
}
span->arrayMask |= SPAN_LAMBDA;
}
@@ -365,15 +461,29 @@ interpolate_texcoords(GLcontext *ctx, struct sw_span *span)
GLfloat r = span->tex[0][2];
GLfloat q = span->tex[0][3];
GLuint i;
- for (i = 0; i < span->end; i++) {
+ if (dq == 0.0) {
+ /* Ortho projection or polygon's parallel to window X axis */
const GLfloat invQ = (q == 0.0F) ? 1.0F : (1.0F / q);
- span->texcoords[0][i][0] = s * invQ;
- span->texcoords[0][i][1] = t * invQ;
- span->texcoords[0][i][2] = r * invQ;
- s += ds;
- t += dt;
- r += dr;
- q += dq;
+ for (i = 0; i < span->end; i++) {
+ span->texcoords[0][i][0] = s * invQ;
+ span->texcoords[0][i][1] = t * invQ;
+ span->texcoords[0][i][2] = r * invQ;
+ s += ds;
+ t += dt;
+ r += dr;
+ }
+ }
+ else {
+ for (i = 0; i < span->end; i++) {
+ const GLfloat invQ = (q == 0.0F) ? 1.0F : (1.0F / q);
+ span->texcoords[0][i][0] = s * invQ;
+ span->texcoords[0][i][1] = t * invQ;
+ span->texcoords[0][i][2] = r * invQ;
+ s += ds;
+ t += dt;
+ r += dr;
+ q += dq;
+ }
}
}
}