summaryrefslogtreecommitdiff
path: root/src/mesa/math
diff options
context:
space:
mode:
authorKeith Whitwell <keith@tungstengraphics.com>2000-11-16 21:05:34 +0000
committerKeith Whitwell <keith@tungstengraphics.com>2000-11-16 21:05:34 +0000
commit23caf20169ac38436ee9c13914f1d6aa7cf6bb5e (patch)
tree21307f7bbcaf9ee1e841d7e7bee130570a7b5b95 /src/mesa/math
parent179516673211a2350e479d5321840291f339f5dd (diff)
Move the transform and lighting code to two new directories
math: Provides basic matrix and vector functionality that might be useful to multiple software t&l implementations, and is used by core mesa to manage the Model, Project, etc matrices. tnl: The real transform & lighting code from core mesa, including everything from glVertex3f through vertex buffer handling, transformation, clipping, lighting and handoff to a driver for rasterization. The interfaces of these can be further tightened up, but the basic splitting up of state and code move is done.
Diffstat (limited to 'src/mesa/math')
-rw-r--r--src/mesa/math/m_clip_tmp.h175
-rw-r--r--src/mesa/math/m_copy_tmp.h126
-rw-r--r--src/mesa/math/m_debug_xform.c930
-rw-r--r--src/mesa/math/m_dotprod_tmp.h128
-rw-r--r--src/mesa/math/m_matrix.c1113
-rw-r--r--src/mesa/math/m_matrix.h176
-rw-r--r--src/mesa/math/m_norm_tmp.h413
-rw-r--r--src/mesa/math/m_trans_tmp.h210
-rw-r--r--src/mesa/math/m_translate.c478
-rw-r--r--src/mesa/math/m_translate.h92
-rw-r--r--src/mesa/math/m_vector.c367
-rw-r--r--src/mesa/math/m_vector.h188
-rw-r--r--src/mesa/math/m_xform.c251
-rw-r--r--src/mesa/math/m_xform.h224
-rw-r--r--src/mesa/math/m_xform_tmp.h974
15 files changed, 5845 insertions, 0 deletions
diff --git a/src/mesa/math/m_clip_tmp.h b/src/mesa/math/m_clip_tmp.h
new file mode 100644
index 0000000000..321d3a9e61
--- /dev/null
+++ b/src/mesa/math/m_clip_tmp.h
@@ -0,0 +1,175 @@
+/* $Id: m_clip_tmp.h,v 1.1 2000/11/16 21:05:41 keithw Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version: 3.1
+ *
+ * Copyright (C) 1999 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * New (3.1) transformation code written by Keith Whitwell.
+ */
+
+
+/* KW: a clever asm implementation would nestle integer versions
+ * of the outcode calculation underneath the division. Gcc won't
+ * do this, strangely enough, so I only do the divide in
+ * the case where the cliptest passes. This isn't essential,
+ * and an asm implementation needn't replicate that behaviour.
+ */
+static GLvector4f * _XFORMAPI TAG(cliptest_points4)( GLvector4f *clip_vec,
+ GLvector4f *proj_vec,
+ GLubyte clipMask[],
+ GLubyte *orMask,
+ GLubyte *andMask )
+{
+ const GLuint stride = clip_vec->stride;
+ const GLfloat *from = (GLfloat *)clip_vec->start;
+ const GLuint count = clip_vec->count;
+ GLuint c = 0;
+ GLfloat (*vProj)[4] = (GLfloat (*)[4])proj_vec->start;
+ GLubyte tmpAndMask = *andMask;
+ GLubyte tmpOrMask = *orMask;
+ GLuint i;
+ STRIDE_LOOP {
+ const GLfloat cx = from[0];
+ const GLfloat cy = from[1];
+ const GLfloat cz = from[2];
+ const GLfloat cw = from[3];
+#if defined(macintosh)
+ /* on powerpc cliptest is 17% faster in this way. */
+ GLuint mask;
+ mask = (((cw < cx) << CLIP_RIGHT_SHIFT));
+ mask |= (((cw < -cx) << CLIP_LEFT_SHIFT));
+ mask |= (((cw < cy) << CLIP_TOP_SHIFT));
+ mask |= (((cw < -cy) << CLIP_BOTTOM_SHIFT));
+ mask |= (((cw < cz) << CLIP_FAR_SHIFT));
+ mask |= (((cw < -cz) << CLIP_NEAR_SHIFT));
+#else /* !defined(macintosh)) */
+ GLubyte mask = 0;
+ if (-cx + cw < 0) mask |= CLIP_RIGHT_BIT;
+ if ( cx + cw < 0) mask |= CLIP_LEFT_BIT;
+ if (-cy + cw < 0) mask |= CLIP_TOP_BIT;
+ if ( cy + cw < 0) mask |= CLIP_BOTTOM_BIT;
+ if (-cz + cw < 0) mask |= CLIP_FAR_BIT;
+ if ( cz + cw < 0) mask |= CLIP_NEAR_BIT;
+#endif /* defined(macintosh) */
+
+ clipMask[i] = mask;
+ if (mask) {
+ c++;
+ tmpAndMask &= mask;
+ tmpOrMask |= mask;
+ vProj[i][0] = 0; /* no longer required? */
+ vProj[i][1] = 0;
+ vProj[i][2] = 0;
+ vProj[i][3] = 1;
+ } else {
+ GLfloat oow = 1.0F / cw;
+ vProj[i][3] = oow;
+ vProj[i][0] = cx * oow;
+ vProj[i][1] = cy * oow;
+ vProj[i][2] = cz * oow;
+ }
+ }
+
+ *orMask = tmpOrMask;
+ *andMask = (GLubyte) (c < count ? 0 : tmpAndMask);
+
+ proj_vec->flags |= VEC_SIZE_4;
+ proj_vec->size = 3;
+ proj_vec->count = clip_vec->count;
+ return proj_vec;
+}
+
+static GLvector4f * _XFORMAPI TAG(cliptest_points3)( GLvector4f *clip_vec,
+ GLvector4f *proj_vec,
+ GLubyte clipMask[],
+ GLubyte *orMask,
+ GLubyte *andMask )
+{
+ const GLuint stride = clip_vec->stride;
+ const GLuint count = clip_vec->count;
+ const GLfloat *from = (GLfloat *)clip_vec->start;
+
+ GLubyte tmpOrMask = *orMask;
+ GLubyte tmpAndMask = *andMask;
+ GLuint i;
+ STRIDE_LOOP {
+ const GLfloat cx = from[0], cy = from[1], cz = from[2];
+ GLubyte mask = 0;
+ if (cx > 1.0) mask |= CLIP_RIGHT_BIT;
+ else if (cx < -1.0) mask |= CLIP_LEFT_BIT;
+ if (cy > 1.0) mask |= CLIP_TOP_BIT;
+ else if (cy < -1.0) mask |= CLIP_BOTTOM_BIT;
+ if (cz > 1.0) mask |= CLIP_FAR_BIT;
+ else if (cz < -1.0) mask |= CLIP_NEAR_BIT;
+ clipMask[i] = mask;
+ tmpOrMask |= mask;
+ tmpAndMask &= mask;
+ }
+
+ gl_vector4f_clean_elem(proj_vec, count, 3);
+
+ *orMask = tmpOrMask;
+ *andMask = tmpAndMask;
+ return clip_vec;
+}
+
+static GLvector4f * _XFORMAPI TAG(cliptest_points2)( GLvector4f *clip_vec,
+ GLvector4f *proj_vec,
+ GLubyte clipMask[],
+ GLubyte *orMask,
+ GLubyte *andMask )
+{
+ const GLuint stride = clip_vec->stride;
+ const GLuint count = clip_vec->count;
+ const GLfloat *from = (GLfloat *)clip_vec->start;
+
+ GLubyte tmpOrMask = *orMask;
+ GLubyte tmpAndMask = *andMask;
+ GLuint i;
+ STRIDE_LOOP {
+ const GLfloat cx = from[0], cy = from[1];
+ GLubyte mask = 0;
+ if (cx > 1.0) mask |= CLIP_RIGHT_BIT;
+ else if (cx < -1.0) mask |= CLIP_LEFT_BIT;
+ if (cy > 1.0) mask |= CLIP_TOP_BIT;
+ else if (cy < -1.0) mask |= CLIP_BOTTOM_BIT;
+ clipMask[i] = mask;
+ tmpOrMask |= mask;
+ tmpAndMask &= mask;
+ }
+
+ gl_vector4f_clean_elem(proj_vec, count, 3);
+
+ *orMask = tmpOrMask;
+ *andMask = tmpAndMask;
+ return clip_vec;
+}
+
+
+static void TAG(init_c_cliptest)( void )
+{
+ gl_clip_tab[4] = TAG(cliptest_points4);
+ gl_clip_tab[3] = TAG(cliptest_points3);
+ gl_clip_tab[2] = TAG(cliptest_points2);
+}
diff --git a/src/mesa/math/m_copy_tmp.h b/src/mesa/math/m_copy_tmp.h
new file mode 100644
index 0000000000..b328537faf
--- /dev/null
+++ b/src/mesa/math/m_copy_tmp.h
@@ -0,0 +1,126 @@
+/* $Id: m_copy_tmp.h,v 1.1 2000/11/16 21:05:41 keithw Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version: 3.1
+ *
+ * Copyright (C) 1999 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * New (3.1) transformation code written by Keith Whitwell.
+ */
+
+
+#define COPY_FUNC( BITS ) \
+static void TAG2(copy, BITS)(GLvector4f *to, const GLvector4f *f, \
+ const GLubyte mask[] ) \
+{ \
+ GLfloat (*t)[4] = (GLfloat (*)[4])to->start; \
+ GLfloat *from = f->start; \
+ GLuint stride = f->stride; \
+ GLuint count = f->count; \
+ GLuint i; \
+ (void) mask; \
+ \
+ if (BITS) \
+ STRIDE_LOOP { \
+ CULL_CHECK { \
+ if (BITS&1) t[i][0] = from[0]; \
+ if (BITS&2) t[i][1] = from[1]; \
+ if (BITS&4) t[i][2] = from[2]; \
+ if (BITS&8) t[i][3] = from[3]; \
+ } \
+ } \
+}
+
+
+
+/* static void TAG2(clean, BITS)(GLvector4f *to ) */
+/* { */
+/* GLfloat (*t)[4] = to->data; */
+/* GLuint i; */
+
+/* if (BITS) */
+/* for (i = 0 ; i < VB_SIZE ; i++) { */
+/* if (BITS&1) t[i][0] = 0; */
+/* if (BITS&2) t[i][1] = 0; */
+/* if (BITS&4) t[i][2] = 0; */
+/* if (BITS&8) t[i][3] = 1; */
+/* } */
+/* to->flags &= ~BITS; */
+/* } */
+
+
+/* We got them all here:
+ */
+COPY_FUNC( 0x0 ) /* noop */
+COPY_FUNC( 0x1 )
+COPY_FUNC( 0x2 )
+COPY_FUNC( 0x3 )
+COPY_FUNC( 0x4 )
+COPY_FUNC( 0x5 )
+COPY_FUNC( 0x6 )
+COPY_FUNC( 0x7 )
+COPY_FUNC( 0x8 )
+COPY_FUNC( 0x9 )
+COPY_FUNC( 0xa )
+COPY_FUNC( 0xb )
+COPY_FUNC( 0xc )
+COPY_FUNC( 0xd )
+COPY_FUNC( 0xe )
+COPY_FUNC( 0xf )
+
+static void TAG2(init_copy, 0 ) ( void )
+{
+ gl_copy_tab[IDX][0x0] = TAG2(copy, 0x0);
+ gl_copy_tab[IDX][0x1] = TAG2(copy, 0x1);
+ gl_copy_tab[IDX][0x2] = TAG2(copy, 0x2);
+ gl_copy_tab[IDX][0x3] = TAG2(copy, 0x3);
+ gl_copy_tab[IDX][0x4] = TAG2(copy, 0x4);
+ gl_copy_tab[IDX][0x5] = TAG2(copy, 0x5);
+ gl_copy_tab[IDX][0x6] = TAG2(copy, 0x6);
+ gl_copy_tab[IDX][0x7] = TAG2(copy, 0x7);
+ gl_copy_tab[IDX][0x8] = TAG2(copy, 0x8);
+ gl_copy_tab[IDX][0x9] = TAG2(copy, 0x9);
+ gl_copy_tab[IDX][0xa] = TAG2(copy, 0xa);
+ gl_copy_tab[IDX][0xb] = TAG2(copy, 0xb);
+ gl_copy_tab[IDX][0xc] = TAG2(copy, 0xc);
+ gl_copy_tab[IDX][0xd] = TAG2(copy, 0xd);
+ gl_copy_tab[IDX][0xe] = TAG2(copy, 0xe);
+ gl_copy_tab[IDX][0xf] = TAG2(copy, 0xf);
+
+/* gl_clean_tab[IDX][0x0] = TAG2(clean, 0x0); */
+/* gl_clean_tab[IDX][0x1] = TAG2(clean, 0x1); */
+/* gl_clean_tab[IDX][0x2] = TAG2(clean, 0x2); */
+/* gl_clean_tab[IDX][0x3] = TAG2(clean, 0x3); */
+/* gl_clean_tab[IDX][0x4] = TAG2(clean, 0x4); */
+/* gl_clean_tab[IDX][0x5] = TAG2(clean, 0x5); */
+/* gl_clean_tab[IDX][0x6] = TAG2(clean, 0x6); */
+/* gl_clean_tab[IDX][0x7] = TAG2(clean, 0x7); */
+/* gl_clean_tab[IDX][0x8] = TAG2(clean, 0x8); */
+/* gl_clean_tab[IDX][0x9] = TAG2(clean, 0x9); */
+/* gl_clean_tab[IDX][0xa] = TAG2(clean, 0xa); */
+/* gl_clean_tab[IDX][0xb] = TAG2(clean, 0xb); */
+/* gl_clean_tab[IDX][0xc] = TAG2(clean, 0xc); */
+/* gl_clean_tab[IDX][0xd] = TAG2(clean, 0xd); */
+/* gl_clean_tab[IDX][0xe] = TAG2(clean, 0xe); */
+/* gl_clean_tab[IDX][0xf] = TAG2(clean, 0xf); */
+}
diff --git a/src/mesa/math/m_debug_xform.c b/src/mesa/math/m_debug_xform.c
new file mode 100644
index 0000000000..5041fc4ee0
--- /dev/null
+++ b/src/mesa/math/m_debug_xform.c
@@ -0,0 +1,930 @@
+/* $Id: m_debug_xform.c,v 1.1 2000/11/16 21:05:41 keithw Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version: 3.5
+ *
+ * Copyright (C) 1999-2000 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Updated for P6 architecture by Gareth Hughes.
+ */
+
+#include "glheader.h"
+#include "context.h"
+#include "mem.h"
+
+#include "m_debug_xform.h"
+#include "m_matrix.h"
+#include "m_xform.h"
+
+
+#ifdef DEBUG /* This code only used for debugging */
+
+
+/* Comment this out to deactivate the cycle counter.
+ * NOTE: it works only on CPUs which know the 'rdtsc' command (586 or higher)
+ * (hope, you don't try to debug Mesa on a 386 ;)
+ */
+#if defined(__GNUC__) && defined(__i386__) && defined(USE_X86_ASM)
+#define RUN_XFORM_BENCHMARK
+#endif
+
+#define TEST_COUNT 128 /* size of the tested vector array */
+
+#define REQUIRED_PRECISION 10 /* allow 4 bits to miss */
+#define MAX_PRECISION 24 /* max. precision possible */
+
+
+#ifdef RUN_XFORM_BENCHMARK
+/* Overhead of profiling counter in cycles. Automatically adjusted to
+ * your machine at run time - counter initialization should give very
+ * consistent results.
+ */
+static int need_counter = 1;
+static long counter_overhead = 0;
+
+/* Modify the the number of tests if you like.
+ * We take the minimum of all results, because every error should be
+ * positive (time used by other processes, task switches etc).
+ * It is assumed that all calculations are done in the cache.
+ */
+
+#if 1 /* PPro, PII, PIII version */
+
+/* Profiling on the P6 architecture requires a little more work, due to
+ * the internal out-of-order execution. We must perform a serializing
+ * 'cpuid' instruction before and after the 'rdtsc' instructions to make
+ * sure no other uops are executed when we sample the timestamp counter.
+ */
+#define INIT_COUNTER() \
+ do { \
+ int cycle_i; \
+ counter_overhead = LONG_MAX; \
+ for ( cycle_i = 0 ; cycle_i < 4 ; cycle_i++ ) { \
+ long cycle_tmp1 = 0, cycle_tmp2 = 0; \
+ __asm__ ( "push %%ebx \n" \
+ "xor %%eax, %%eax \n" \
+ "cpuid \n" \
+ "rdtsc \n" \
+ "mov %%eax, %0 \n" \
+ "xor %%eax, %%eax \n" \
+ "cpuid \n" \
+ "pop %%ebx \n" \
+ "push %%ebx \n" \
+ "xor %%eax, %%eax \n" \
+ "cpuid \n" \
+ "rdtsc \n" \
+ "mov %%eax, %1 \n" \
+ "xor %%eax, %%eax \n" \
+ "cpuid \n" \
+ "pop %%ebx \n" \
+ : "=m" (cycle_tmp1), "=m" (cycle_tmp2) \
+ : : "eax", "ecx", "edx" ); \
+ if ( counter_overhead > (cycle_tmp2 - cycle_tmp1) ) { \
+ counter_overhead = cycle_tmp2 - cycle_tmp1; \
+ } \
+ } \
+ } while (0)
+
+#define BEGIN_RACE(x) \
+ x = LONG_MAX; \
+ for ( cycle_i = 0 ; cycle_i < 10 ; cycle_i++ ) { \
+ long cycle_tmp1 = 0, cycle_tmp2 = 0; \
+ __asm__ ( "push %%ebx \n" \
+ "xor %%eax, %%eax \n" \
+ "cpuid \n" \
+ "rdtsc \n" \
+ "mov %%eax, %0 \n" \
+ "xor %%eax, %%eax \n" \
+ "cpuid \n" \
+ "pop %%ebx \n" \
+ : "=m" (cycle_tmp1) \
+ : : "eax", "ecx", "edx" );
+
+#define END_RACE(x) \
+ __asm__ ( "push %%ebx \n" \
+ "xor %%eax, %%eax \n" \
+ "cpuid \n" \
+ "rdtsc \n" \
+ "mov %%eax, %0 \n" \
+ "xor %%eax, %%eax \n" \
+ "cpuid \n" \
+ "pop %%ebx \n" \
+ : "=m" (cycle_tmp2) \
+ : : "eax", "ecx", "edx" ); \
+ if ( x > (cycle_tmp2 - cycle_tmp1) ) { \
+ x = cycle_tmp2 - cycle_tmp1; \
+ } \
+ } \
+ x -= counter_overhead;
+
+#else /* PPlain, PMMX version */
+
+/* To ensure accurate results, we stall the pipelines with the
+ * non-pairable 'cdq' instruction. This ensures all the code being
+ * profiled is complete when the 'rdtsc' instruction executes.
+ */
+#define INIT_COUNTER(x) \
+ do { \
+ int cycle_i; \
+ x = LONG_MAX; \
+ for ( cycle_i = 0 ; cycle_i < 32 ; cycle_i++ ) { \
+ long cycle_tmp1, cycle_tmp2, dummy; \
+ __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp1) ); \
+ __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp2) ); \
+ __asm__ ( "cdq" ); \
+ __asm__ ( "cdq" ); \
+ __asm__ ( "rdtsc" : "=a" (cycle_tmp1), "=d" (dummy) ); \
+ __asm__ ( "cdq" ); \
+ __asm__ ( "cdq" ); \
+ __asm__ ( "rdtsc" : "=a" (cycle_tmp2), "=d" (dummy) ); \
+ if ( x > (cycle_tmp2 - cycle_tmp1) ) \
+ x = cycle_tmp2 - cycle_tmp1; \
+ } \
+ } while (0)
+
+#define BEGIN_RACE(x) \
+ x = LONG_MAX; \
+ for ( cycle_i = 0 ; cycle_i < 16 ; cycle_i++ ) { \
+ long cycle_tmp1, cycle_tmp2, dummy; \
+ __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp1) ); \
+ __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp2) ); \
+ __asm__ ( "cdq" ); \
+ __asm__ ( "cdq" ); \
+ __asm__ ( "rdtsc" : "=a" (cycle_tmp1), "=d" (dummy) );
+
+
+#define END_RACE(x) \
+ __asm__ ( "cdq" ); \
+ __asm__ ( "cdq" ); \
+ __asm__ ( "rdtsc" : "=a" (cycle_tmp2), "=d" (dummy) ); \
+ if ( x > (cycle_tmp2 - cycle_tmp1) ) \
+ x = cycle_tmp2 - cycle_tmp1; \
+ } \
+ x -= counter_overhead;
+
+#endif
+
+#else
+
+#define BEGIN_RACE(x)
+#define END_RACE(x)
+
+#endif
+
+
+static char *mesa_profile = NULL;
+
+
+enum { NIL=0, ONE=1, NEG=-1, VAR=2 };
+
+static int m_general[16] = {
+ VAR, VAR, VAR, VAR,
+ VAR, VAR, VAR, VAR,
+ VAR, VAR, VAR, VAR,
+ VAR, VAR, VAR, VAR
+};
+static int m_identity[16] = {
+ ONE, NIL, NIL, NIL,
+ NIL, ONE, NIL, NIL,
+ NIL, NIL, ONE, NIL,
+ NIL, NIL, NIL, ONE
+};
+static int m_2d[16] = {
+ VAR, VAR, NIL, VAR,
+ VAR, VAR, NIL, VAR,
+ NIL, NIL, ONE, NIL,
+ NIL, NIL, NIL, ONE
+};
+static int m_2d_no_rot[16] = {
+ VAR, NIL, NIL, VAR,
+ NIL, VAR, NIL, VAR,
+ NIL, NIL, ONE, NIL,
+ NIL, NIL, NIL, ONE
+};
+static int m_3d[16] = {
+ VAR, VAR, VAR, VAR,
+ VAR, VAR, VAR, VAR,
+ VAR, VAR, VAR, VAR,
+ NIL, NIL, NIL, ONE
+};
+static int m_3d_no_rot[16] = {
+ VAR, NIL, NIL, VAR,
+ NIL, VAR, NIL, VAR,
+ NIL, NIL, VAR, VAR,
+ NIL, NIL, NIL, ONE
+};
+static int m_perspective[16] = {
+ VAR, NIL, VAR, NIL,
+ NIL, VAR, VAR, NIL,
+ NIL, NIL, VAR, VAR,
+ NIL, NIL, NEG, NIL
+};
+static int *templates[7] = {
+ m_general,
+ m_identity,
+ m_3d_no_rot,
+ m_perspective,
+ m_2d,
+ m_2d_no_rot,
+ m_3d
+};
+static int mtypes[7] = {
+ MATRIX_GENERAL,
+ MATRIX_IDENTITY,
+ MATRIX_3D_NO_ROT,
+ MATRIX_PERSPECTIVE,
+ MATRIX_2D,
+ MATRIX_2D_NO_ROT,
+ MATRIX_3D
+};
+static char *mstrings[7] = {
+ "MATRIX_GENERAL",
+ "MATRIX_IDENTITY",
+ "MATRIX_3D_NO_ROT",
+ "MATRIX_PERSPECTIVE",
+ "MATRIX_2D",
+ "MATRIX_2D_NO_ROT",
+ "MATRIX_3D"
+};
+
+
+
+static int m_norm_identity[16] = {
+ ONE, NIL, NIL, NIL,
+ NIL, ONE, NIL, NIL,
+ NIL, NIL, ONE, NIL,
+ NIL, NIL, NIL, NIL
+};
+static int m_norm_general[16] = {
+ VAR, VAR, VAR, NIL,
+ VAR, VAR, VAR, NIL,
+ VAR, VAR, VAR, NIL,
+ NIL, NIL, NIL, NIL
+};
+static int m_norm_no_rot[16] = {
+ VAR, NIL, NIL, NIL,
+ NIL, VAR, NIL, NIL,
+ NIL, NIL, VAR, NIL,
+ NIL, NIL, NIL, NIL
+};
+static int *norm_templates[8] = {
+ m_norm_no_rot,
+ m_norm_no_rot,
+ m_norm_no_rot,
+ m_norm_general,
+ m_norm_general,
+ m_norm_general,
+ m_norm_identity,
+ m_norm_identity
+};
+static int norm_types[8] = {
+ NORM_TRANSFORM_NO_ROT,
+ NORM_TRANSFORM_NO_ROT | NORM_RESCALE,
+ NORM_TRANSFORM_NO_ROT | NORM_NORMALIZE,
+ NORM_TRANSFORM,
+ NORM_TRANSFORM | NORM_RESCALE,
+ NORM_TRANSFORM | NORM_NORMALIZE,
+ NORM_RESCALE,
+ NORM_NORMALIZE
+};
+static int norm_scale_types[8] = { /* rescale factor */
+ NIL, /* NIL disables rescaling */
+ VAR,
+ NIL,
+ NIL,
+ VAR,
+ NIL,
+ VAR,
+ NIL
+};
+static int norm_normalize_types[8] = { /* normalizing ?? (no = 0) */
+ 0,
+ 0,
+ 1,
+ 0,
+ 0,
+ 1,
+ 0,
+ 1
+};
+static char *norm_strings[8] = {
+ "NORM_TRANSFORM_NO_ROT",
+ "NORM_TRANSFORM_NO_ROT | NORM_RESCALE",
+ "NORM_TRANSFORM_NO_ROT | NORM_NORMALIZE",
+ "NORM_TRANSFORM",
+ "NORM_TRANSFORM | NORM_RESCALE",
+ "NORM_TRANSFORM | NORM_NORMALIZE",
+ "NORM_RESCALE",
+ "NORM_NORMALIZE"
+};
+
+
+
+/* ================================================================
+ * Helper functions
+ */
+
+static GLfloat rnd( void )
+{
+ GLfloat f = (GLfloat)rand() / (GLfloat)RAND_MAX;
+ GLfloat gran = (GLfloat)(1 << 13);
+
+ f = (GLfloat)(GLint)(f * gran) / gran;
+
+ return f * 2.0 - 1.0;
+}
+
+static int significand_match( GLfloat a, GLfloat b )
+{
+ GLfloat d = a - b;
+ int a_ex, b_ex, d_ex;
+
+ if ( d == 0.0F ) {
+ return MAX_PRECISION; /* Exact match */
+ }
+
+ if ( a == 0.0F || b == 0.0F ) {
+ /* It would probably be better to check if the
+ * non-zero number is denormalized and return
+ * the index of the highest set bit here.
+ */
+ return 0;
+ }
+
+ frexp( a, &a_ex );
+ frexp( b, &b_ex );
+ frexp( d, &d_ex );
+
+ if ( a_ex < b_ex )
+ return a_ex - d_ex;
+ else
+ return b_ex - d_ex;
+}
+
+
+
+/* ================================================================
+ * Reference transformations
+ */
+
+static void ref_transform( GLvector4f *dst,
+ const GLmatrix *mat,
+ const GLvector4f *src,
+ const GLubyte *clipmask,
+ const GLubyte flag )
+{
+ int i;
+ GLfloat *s = (GLfloat *)src->start;
+ GLfloat (*d)[4] = (GLfloat (*)[4])dst->start;
+ const GLfloat *m = mat->m;
+
+ (void) clipmask;
+ (void) flag;
+
+ for ( i = 0 ; i < src->count ; i++ ) {
+ GLfloat x = s[0], y = s[1], z = s[2], w = s[3];
+ d[i][0] = m[0]*x + m[4]*y + m[ 8]*z + m[12]*w;
+ d[i][1] = m[1]*x + m[5]*y + m[ 9]*z + m[13]*w;
+ d[i][2] = m[2]*x + m[6]*y + m[10]*z + m[14]*w;
+ d[i][3] = m[3]*x + m[7]*y + m[11]*z + m[15]*w;
+ s = (GLfloat *)((char *)s + src->stride);
+ }
+}
+
+static void ref_norm_transform_rescale( const GLmatrix *mat,
+ GLfloat scale,
+ const GLvector3f *in,
+ const GLfloat *lengths,
+ const GLubyte mask[],
+ GLvector3f *dest )
+{
+ int i;
+ const GLfloat *s = in->start;
+ const GLfloat *m = mat->inv;
+ GLfloat (*out)[3] = (GLfloat (*)[3])dest->start;
+
+ (void) mask;
+ (void) lengths;
+
+ for ( i = 0 ; i < in->count ; i++ ) {
+ GLfloat x = s[0], y = s[1], z = s[2] ;
+ GLfloat tx = m[0]*x + m[1]*y + m[ 2]*z ;
+ GLfloat ty = m[4]*x + m[5]*y + m[ 6]*z ;
+ GLfloat tz = m[8]*x + m[9]*y + m[10]*z ;
+
+ out[i][0] = tx * scale;
+ out[i][1] = ty * scale;
+ out[i][2] = tz * scale;
+
+ s = (GLfloat *)((char *)s + in->stride);
+ }
+}
+
+static void ref_norm_transform_normalize( const GLmatrix *mat,
+ GLfloat scale,
+ const GLvector3f *in,
+ const GLfloat *lengths,
+ const GLubyte mask[],
+ GLvector3f *dest )
+{
+ int i;
+ const GLfloat *s = in->start;
+ const GLfloat *m = mat->inv;
+ GLfloat (*out)[3] = (GLfloat (*)[3])dest->start;
+
+ (void) mask;
+
+ for ( i = 0 ; i < in->count ; i++ ) {
+ GLfloat x = s[0], y = s[1], z = s[2] ;
+ GLfloat tx = m[0]*x + m[1]*y + m[ 2]*z ;
+ GLfloat ty = m[4]*x + m[5]*y + m[ 6]*z ;
+ GLfloat tz = m[8]*x + m[9]*y + m[10]*z ;
+
+ if ( !lengths ) {
+ GLfloat len = tx*tx + ty*ty + tz*tz;
+ if ( len > 1e-20 ) {
+ /* Hmmm, don't know how we could test the precalculated
+ * length case...
+ */
+ scale = 1.0 / sqrt( len );
+ out[i][0] = tx * scale;
+ out[i][1] = ty * scale;
+ out[i][2] = tz * scale;
+ } else {
+ out[i][0] = out[i][1] = out[i][2] = 0;
+ }
+ } else {
+ scale = lengths[i];;
+ out[i][0] = tx * scale;
+ out[i][1] = ty * scale;
+ out[i][2] = tz * scale;
+ }
+
+ s = (GLfloat *)((char *)s + in->stride);
+ }
+}
+
+
+
+/* ================================================================
+ * Vertex transformation tests
+ */
+
+/* Ensure our arrays are correctly aligned.
+ */
+#if defined(__GNUC__)
+#define ALIGN16(x) x __attribute__ ((aligned (16)))
+#else
+#define ALIGN16(x) x
+#endif
+static GLfloat ALIGN16(s[TEST_COUNT][5]);
+static GLfloat ALIGN16(d[TEST_COUNT][4]);
+static GLfloat ALIGN16(r[TEST_COUNT][4]);
+
+static int test_transform_function( transform_func func, int psize, int mtype,
+ int masked, long *cycles )
+{
+ GLvector4f source[1], dest[1], ref[1];
+ GLmatrix mat[1];
+ GLfloat *m;
+ GLubyte mask[TEST_COUNT];
+ int i, j;
+#ifdef RUN_XFORM_BENCHMARK
+ int cycle_i; /* the counter for the benchmarks we run */
+#endif
+
+ (void) cycles;
+
+ if ( psize > 4 ) {
+ gl_problem( NULL, "test_transform_function called with psize > 4\n" );
+ return 0;
+ }
+
+ mat->m = (GLfloat *) ALIGN_MALLOC( 16 * sizeof(GLfloat), 16 );
+ mat->type = mtypes[mtype];
+
+ m = mat->m;
+
+ m[0] = 63.0; m[4] = 43.0; m[ 8] = 29.0; m[12] = 43.0;
+ m[1] = 55.0; m[5] = 17.0; m[ 9] = 31.0; m[13] = 7.0;
+ m[2] = 44.0; m[6] = 9.0; m[10] = 7.0; m[14] = 3.0;
+ m[3] = 11.0; m[7] = 23.0; m[11] = 91.0; m[15] = 9.0;
+
+ for ( i = 0 ; i < 4 ; i++ ) {
+ for ( j = 0 ; j < 4 ; j++ ) {
+ switch ( templates[mtype][i * 4 + j] ) {
+ case NIL:
+ m[j * 4 + i] = 0.0;
+ break;
+ case ONE:
+ m[j * 4 + i] = 1.0;
+ break;
+ case NEG:
+ m[j * 4 + i] = -1.0;
+ break;
+ case VAR:
+ break;
+ default:
+ abort();
+ }
+ }
+ }
+
+ for ( i = 0 ; i < TEST_COUNT ; i++) {
+ mask[i] = i % 2; /* mask every 2nd element */
+ d[i][0] = s[i][0] = 0.0;
+ d[i][1] = s[i][1] = 0.0;
+ d[i][2] = s[i][2] = 0.0;
+ d[i][3] = s[i][3] = 1.0;
+ for ( j = 0 ; j < psize ; j++ )
+ s[i][j] = rnd();
+ }
+
+ source->data = (GLfloat(*)[4])s;
+ source->start = (GLfloat *)s;
+ source->count = TEST_COUNT;
+ source->stride = sizeof(s[0]);
+ source->size = 4;
+ source->flags = 0;
+
+ dest->data = (GLfloat(*)[4])d;
+ dest->start = (GLfloat *)d;
+ dest->count = TEST_COUNT;
+ dest->stride = sizeof(float[4]);
+ dest->size = 0;
+ dest->flags = 0;
+
+ ref->data = (GLfloat(*)[4])r;
+ ref->start = (GLfloat *)r;
+ ref->count = TEST_COUNT;
+ ref->stride = sizeof(float[4]);
+ ref->size = 0;
+ ref->flags = 0;
+
+ ref_transform( ref, mat, source, NULL, 0 );
+
+ if ( mesa_profile ) {
+ if ( masked ) {
+ BEGIN_RACE( *cycles );
+ func( dest, mat->m, source, mask, 1 );
+ END_RACE( *cycles );
+ } else {
+ BEGIN_RACE( *cycles );
+ func( dest, mat->m, source, NULL, 0 );
+ END_RACE( *cycles );
+ }
+ }
+ else {
+ if ( masked ) {
+ func( dest, mat->m, source, mask, 1 );
+ } else {
+ func( dest, mat->m, source, NULL, 0 );
+ }
+ }
+
+ for ( i = 0 ; i < TEST_COUNT ; i++ ) {
+ if ( masked && (mask[i] & 1) )
+ continue;
+
+ for ( j = 0 ; j < 4 ; j++ ) {
+ if ( significand_match( d[i][j], r[i][j] ) < REQUIRED_PRECISION ) {
+ printf( "-----------------------------\n" );
+ printf( "(i = %i, j = %i)\n", i, j );
+ printf( "%f \t %f \t [diff = %e - %i bit missed]\n",
+ d[i][0], r[i][0], r[i][0]-d[i][0],
+ MAX_PRECISION - significand_match( d[i][0], r[i][0] ) );
+ printf( "%f \t %f \t [diff = %e - %i bit missed]\n",
+ d[i][1], r[i][1], r[i][1]-d[i][1],
+ MAX_PRECISION - significand_match( d[i][1], r[i][1] ) );
+ printf( "%f \t %f \t [diff = %e - %i bit missed]\n",
+ d[i][2], r[i][2], r[i][2]-d[i][2],
+ MAX_PRECISION - significand_match( d[i][2], r[i][2] ) );
+ printf( "%f \t %f \t [diff = %e - %i bit missed]\n",
+ d[i][3], r[i][3], r[i][3]-d[i][3],
+ MAX_PRECISION - significand_match( d[i][3], r[i][3] ) );
+ return 0;
+ }
+ }
+ }
+
+ ALIGN_FREE( mat->m );
+ return 1;
+}
+
+void gl_test_all_transform_functions( char *description )
+{
+ int masked, psize, mtype;
+ long benchmark_tab[2][4][7];
+ static int first_time = 1;
+
+ if ( first_time ) {
+ first_time = 0;
+ mesa_profile = getenv( "MESA_PROFILE" );
+ }
+
+#ifdef RUN_XFORM_BENCHMARK
+ if ( mesa_profile ) {
+ if ( need_counter ) {
+ need_counter = 0;
+ INIT_COUNTER();
+ printf( "counter overhead: %ld cycles\n\n", counter_overhead );
+ }
+ printf( "transform results after hooking in %s functions:\n", description );
+ }
+#endif
+
+ for ( masked = 0 ; masked <= 1 ; masked++ ) {
+ int cma = masked ? 1 : 0;
+ char *cmastring = masked ? "CULL_MASK_ACTIVE" : "0";
+
+#ifdef RUN_XFORM_BENCHMARK
+ if ( mesa_profile ) {
+ printf( "\n culling: %s \n", masked ? "CULL_MASK_ACTIVE" : "0" );
+ for ( psize = 1 ; psize <= 4 ; psize++ ) {
+ printf( " p%d\t", psize );
+ }
+ printf( "\n--------------------------------------------------------\n" );
+ }
+#endif
+
+ for ( mtype = 0 ; mtype < 7 ; mtype++ ) {
+ for ( psize = 1 ; psize <= 4 ; psize++ ) {
+ transform_func func = gl_transform_tab[cma][psize][mtypes[mtype]];
+ long *cycles = &(benchmark_tab[cma][psize-1][mtype]);
+
+ if ( test_transform_function( func, psize, mtype,
+ masked, cycles ) == 0 ) {
+ char buf[100];
+ sprintf( buf, "gl_transform_tab[%s][%d][%s] failed test (%s)",
+ cmastring, psize, mstrings[mtype], description );
+ gl_problem( NULL, buf );
+ }
+#ifdef RUN_XFORM_BENCHMARK
+ if ( mesa_profile )
+ printf( " %li\t", benchmark_tab[cma][psize-1][mtype] );
+#endif
+ }
+#ifdef RUN_XFORM_BENCHMARK
+ if ( mesa_profile )
+ printf( " | [%s]\n", mstrings[mtype] );
+#endif
+ }
+#ifdef RUN_XFORM_BENCHMARK
+ if ( mesa_profile )
+ printf( "\n" );
+#endif
+ }
+}
+
+
+
+/* ================================================================
+ * Normal transformation tests
+ */
+
+static int test_norm_function( normal_func func, int mtype,
+ int masked, long *cycles )
+{
+ GLvector3f source[1], dest[1], dest2[1], ref[1], ref2[1];
+ GLmatrix mat[1];
+ GLfloat s[TEST_COUNT][5], d[TEST_COUNT][3], r[TEST_COUNT][3];
+ GLfloat d2[TEST_COUNT][3], r2[TEST_COUNT][3], length[TEST_COUNT];
+ GLfloat scale;
+ GLfloat *m;
+ GLubyte mask[TEST_COUNT];
+ int i, j;
+#ifdef RUN_XFORM_BENCHMARK
+ int cycle_i; /* the counter for the benchmarks we run */
+#endif
+
+ (void) cycles;
+
+ mat->m = (GLfloat *) ALIGN_MALLOC( 16 * sizeof(GLfloat), 16 );
+ mat->inv = m = mat->m;
+
+ m[0] = 63.0; m[4] = 43.0; m[ 8] = 29.0; m[12] = 43.0;
+ m[1] = 55.0; m[5] = 17.0; m[ 9] = 31.0; m[13] = 7.0;
+ m[2] = 44.0; m[6] = 9.0; m[10] = 7.0; m[14] = 3.0;
+ m[3] = 11.0; m[7] = 23.0; m[11] = 91.0; m[15] = 9.0;
+
+ scale = 1.0F + rnd () * norm_scale_types[mtype];
+
+ for ( i = 0 ; i < 4 ; i++ ) {
+ for ( j = 0 ; j < 4 ; j++ ) {
+ switch ( norm_templates[mtype][i * 4 + j] ) {
+ case NIL:
+ m[j * 4 + i] = 0.0;
+ break;
+ case ONE:
+ m[j * 4 + i] = 1.0;
+ break;
+ case NEG:
+ m[j * 4 + i] = -1.0;
+ break;
+ case VAR:
+ break;
+ default:
+ abort();
+ }
+ }
+ }
+
+ for ( i = 0 ; i < TEST_COUNT ; i++ ) {
+ mask[i] = i % 2; /* mask every 2nd element */
+ d[i][0] = s[i][0] = d2[i][0] = 0.0;
+ d[i][1] = s[i][1] = d2[i][1] = 0.0;
+ d[i][2] = s[i][2] = d2[i][2] = 0.0;
+ for ( j = 0 ; j < 3 ; j++ )
+ s[i][j] = rnd();
+ length[i] = 1 / sqrt( s[i][0]*s[i][0] +
+ s[i][1]*s[i][1] +
+ s[i][2]*s[i][2] );
+ }
+
+ source->data = (GLfloat(*)[3])s;
+ source->start = (GLfloat *)s;
+ source->count = TEST_COUNT;
+ source->stride = sizeof(s[0]);
+ source->flags = 0;
+
+ dest->data = (GLfloat(*)[3])d;
+ dest->start = (GLfloat *)d;
+ dest->count = TEST_COUNT;
+ dest->stride = sizeof(float[3]);
+ dest->flags = 0;
+
+ dest2->data = (GLfloat(*)[3])d2;
+ dest2->start = (GLfloat *)d2;
+ dest2->count = TEST_COUNT;
+ dest2->stride = sizeof(float[3]);
+ dest2->flags = 0;
+
+ ref->data = (GLfloat(*)[3])r;
+ ref->start = (GLfloat *)r;
+ ref->count = TEST_COUNT;
+ ref->stride = sizeof(float[3]);
+ ref->flags = 0;
+
+ ref2->data = (GLfloat(*)[3])r2;
+ ref2->start = (GLfloat *)r2;
+ ref2->count = TEST_COUNT;
+ ref2->stride = sizeof(float[3]);
+ ref2->flags = 0;
+
+ if ( norm_normalize_types[mtype] == 0 ) {
+ ref_norm_transform_rescale( mat, scale, source, NULL, NULL, ref );
+ } else {
+ ref_norm_transform_normalize( mat, scale, source, NULL, NULL, ref );
+ ref_norm_transform_normalize( mat, scale, source, length, NULL, ref2 );
+ }
+
+ if ( mesa_profile ) {
+ if ( masked ) {
+ BEGIN_RACE( *cycles );
+ func( mat, scale, source, NULL, mask, dest );
+ END_RACE( *cycles );
+ func( mat, scale, source, length, mask, dest2 );
+ } else {
+ BEGIN_RACE( *cycles );
+ func( mat, scale, source, NULL, NULL, dest );
+ END_RACE( *cycles );
+ func( mat, scale, source, length, NULL, dest2 );
+ }
+ } else {
+ if ( masked ) {
+ func( mat, scale, source, NULL, mask, dest );
+ func( mat, scale, source, length, mask, dest2 );
+ } else {
+ func( mat, scale, source, NULL, NULL, dest );
+ func( mat, scale, source, length, NULL, dest2 );
+ }
+ }
+
+ for ( i = 0 ; i < TEST_COUNT ; i++ ) {
+ if ( masked && !(mask[i] & 1) )
+ continue;
+
+ for ( j = 0 ; j < 3 ; j++ ) {
+ if ( significand_match( d[i][j], r[i][j] ) < REQUIRED_PRECISION ) {
+ printf( "-----------------------------\n" );
+ printf( "(i = %i, j = %i)\n", i, j );
+ printf( "%f \t %f \t [ratio = %e - %i bit missed]\n",
+ d[i][0], r[i][0], r[i][0]/d[i][0],
+ MAX_PRECISION - significand_match( d[i][0], r[i][0] ) );
+ printf( "%f \t %f \t [ratio = %e - %i bit missed]\n",
+ d[i][1], r[i][1], r[i][1]/d[i][1],
+ MAX_PRECISION - significand_match( d[i][1], r[i][1] ) );
+ printf( "%f \t %f \t [ratio = %e - %i bit missed]\n",
+ d[i][2], r[i][2], r[i][2]/d[i][2],
+ MAX_PRECISION - significand_match( d[i][2], r[i][2] ) );
+ return 0;
+ }
+
+ if ( norm_normalize_types[mtype] != 0 ) {
+ if ( significand_match( d2[i][j], r2[i][j] ) < REQUIRED_PRECISION ) {
+ printf( "------------------- precalculated length case ------\n" );
+ printf( "(i = %i, j = %i)\n", i, j );
+ printf( "%f \t %f \t [ratio = %e - %i bit missed]\n",
+ d2[i][0], r2[i][0], r2[i][0]/d2[i][0],
+ MAX_PRECISION - significand_match( d2[i][0], r2[i][0] ) );
+ printf( "%f \t %f \t [ratio = %e - %i bit missed]\n",
+ d2[i][1], r2[i][1], r2[i][1]/d2[i][1],
+ MAX_PRECISION - significand_match( d2[i][1], r2[i][1] ) );
+ printf( "%f \t %f \t [ratio = %e - %i bit missed]\n",
+ d2[i][2], r2[i][2], r2[i][2]/d2[i][2],
+ MAX_PRECISION - significand_match( d2[i][2], r2[i][2] ) );
+ return 0;
+ }
+ }
+ }
+ }
+
+ ALIGN_FREE( mat->m );
+ return 1;
+}
+
+void gl_test_all_normal_transform_functions( char *description )
+{
+ int masked;
+ int mtype;
+ long benchmark_tab[0xf][0x4];
+ static int first_time = 1;
+
+ if ( first_time ) {
+ first_time = 0;
+ mesa_profile = getenv( "MESA_PROFILE" );
+ }
+
+#ifdef RUN_XFORM_BENCHMARK
+ if ( mesa_profile ) {
+ if ( need_counter ) {
+ need_counter = 0;
+ INIT_COUNTER();
+ printf( "counter overhead: %ld cycles\n\n", counter_overhead );
+ }
+ printf( "normal transform results after hooking in %s functions:\n",
+ description );
+ }
+#endif
+
+ for ( masked = 0 ; masked <= 1 ; masked++ ) {
+ int cma = masked ? 1 : 0;
+ char *cmastring = masked ? "CULL_MASK_ACTIVE" : "0";
+
+#ifdef RUN_XFORM_BENCHMARK
+ if ( mesa_profile ) {
+ printf( "\n culling: %s \n", masked ? "CULL_MASK_ACTIVE" : "0" );
+ printf( "\n-------------------------------------------------------\n" );
+ }
+#endif
+
+ for ( mtype = 0 ; mtype < 8 ; mtype++ ) {
+ normal_func func = gl_normal_tab[norm_types[mtype]][cma];
+ long *cycles = &(benchmark_tab[mtype][cma]);
+
+ if ( test_norm_function( func, mtype, masked, cycles ) == 0 ) {
+ char buf[100];
+ sprintf( buf, "gl_normal_tab[%s][%s] failed test (%s)",
+ cmastring, norm_strings[mtype], description );
+ gl_problem( NULL, buf );
+ }
+
+#ifdef RUN_XFORM_BENCHMARK
+ if ( mesa_profile ) {
+ printf( " %li\t", benchmark_tab[mtype][cma] );
+ printf( " | [%s]\n", norm_strings[mtype] );
+ }
+ }
+ if ( mesa_profile )
+ printf( "\n" );
+#else
+ }
+#endif
+ }
+#ifdef RUN_XFORM_BENCHMARK
+ if ( mesa_profile )
+ fflush( stdout );
+#endif
+}
+
+#endif /* DEBUG */
diff --git a/src/mesa/math/m_dotprod_tmp.h b/src/mesa/math/m_dotprod_tmp.h
new file mode 100644
index 0000000000..637e35fd58
--- /dev/null
+++ b/src/mesa/math/m_dotprod_tmp.h
@@ -0,0 +1,128 @@
+/* $Id: m_dotprod_tmp.h,v 1.1 2000/11/16 21:05:41 keithw Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version: 3.1
+ *
+ * Copyright (C) 1999 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * New (3.1) transformation code written by Keith Whitwell.
+ */
+
+
+/* Note - respects the stride of the output vector.
+ */
+static void TAG(dotprod_vec2)( GLvector4f *out_vec,
+ GLuint elt,
+ const GLvector4f *coord_vec,
+ const GLfloat plane[4],
+ const GLubyte mask[])
+{
+ GLuint stride = coord_vec->stride;
+ GLfloat *coord = coord_vec->start;
+ GLuint count = coord_vec->count;
+
+ GLuint outstride = out_vec->stride;
+ GLfloat *out = out_vec->start + elt;
+ GLuint i;
+
+ const GLfloat plane0 = plane[0], plane1 = plane[1], plane3 = plane[3];
+
+ (void) mask;
+
+ for (i=0;i<count;i++,STRIDE_F(coord,stride),STRIDE_F(out,outstride)) {
+ CULL_CHECK {
+ *out = (coord[0] * plane0 +
+ coord[1] * plane1 +
+ plane3);
+ }
+ }
+ out_vec->count = coord_vec->count;
+}
+
+static void TAG(dotprod_vec3)( GLvector4f *out_vec,
+ GLuint elt,
+ const GLvector4f *coord_vec,
+ const GLfloat plane[4],
+ const GLubyte mask[])
+{
+ GLuint stride = coord_vec->stride;
+ GLfloat *coord = coord_vec->start;
+ GLuint count = coord_vec->count;
+
+ GLuint outstride = out_vec->stride;
+ GLfloat *out = out_vec->start + elt;
+ GLuint i;
+
+ const GLfloat plane0 = plane[0], plane1 = plane[1], plane2 = plane[2];
+ const GLfloat plane3 = plane[3];
+
+ (void) mask;
+
+ for (i=0;i<count;i++,STRIDE_F(coord,stride),STRIDE_F(out,outstride)) {
+ CULL_CHECK {
+ *out = (coord[0] * plane0 +
+ coord[1] * plane1 +
+ coord[2] * plane2 +
+ plane3);
+ }
+ }
+ out_vec->count = coord_vec->count;
+}
+
+static void TAG(dotprod_vec4)( GLvector4f *out_vec,
+ GLuint elt,
+ const GLvector4f *coord_vec,
+ const GLfloat plane[4],
+ const GLubyte mask[])
+{
+ GLuint stride = coord_vec->stride;
+ GLfloat *coord = coord_vec->start;
+ GLuint count = coord_vec->count;
+
+ GLuint outstride = out_vec->stride;
+ GLfloat *out = out_vec->start + elt;
+ GLuint i;
+
+ const GLfloat plane0 = plane[0], plane1 = plane[1], plane2 = plane[2];
+ const GLfloat plane3 = plane[3];
+
+ (void) mask;
+
+ for (i=0;i<count;i++,STRIDE_F(coord,stride),STRIDE_F(out,outstride)) {
+ CULL_CHECK {
+ *out = (coord[0] * plane0 +
+ coord[1] * plane1 +
+ coord[2] * plane2 +
+ coord[3] * plane3);
+ }
+ }
+ out_vec->count = coord_vec->count;
+}
+
+
+static void TAG(init_dotprod)( void )
+{
+ gl_dotprod_tab[IDX&1][2] = TAG(dotprod_vec2);
+ gl_dotprod_tab[IDX&1][3] = TAG(dotprod_vec3);
+ gl_dotprod_tab[IDX&1][4] = TAG(dotprod_vec4);
+}
diff --git a/src/mesa/math/m_matrix.c b/src/mesa/math/m_matrix.c
new file mode 100644
index 0000000000..ae55c946d9
--- /dev/null
+++ b/src/mesa/math/m_matrix.c
@@ -0,0 +1,1113 @@
+/* $Id: m_matrix.c,v 1.1 2000/11/16 21:05:41 keithw Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version: 3.5
+ *
+ * Copyright (C) 1999-2000 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/*
+ * Matrix operations
+ *
+ * NOTES:
+ * 1. 4x4 transformation matrices are stored in memory in column major order.
+ * 2. Points/vertices are to be thought of as column vectors.
+ * 3. Transformation of a point p by a matrix M is: p' = M * p
+ */
+
+
+#include "glheader.h"
+#include "macros.h"
+#include "mem.h"
+#include "mmath.h"
+
+#include "m_matrix.h"
+
+
+static const char *types[] = {
+ "MATRIX_GENERAL",
+ "MATRIX_IDENTITY",
+ "MATRIX_3D_NO_ROT",
+ "MATRIX_PERSPECTIVE",
+ "MATRIX_2D",
+ "MATRIX_2D_NO_ROT",
+ "MATRIX_3D"
+};
+
+
+static GLfloat Identity[16] = {
+ 1.0, 0.0, 0.0, 0.0,
+ 0.0, 1.0, 0.0, 0.0,
+ 0.0, 0.0, 1.0, 0.0,
+ 0.0, 0.0, 0.0, 1.0
+};
+
+
+
+
+/*
+ * This matmul was contributed by Thomas Malik
+ *
+ * Perform a 4x4 matrix multiplication (product = a x b).
+ * Input: a, b - matrices to multiply
+ * Output: product - product of a and b
+ * WARNING: (product != b) assumed
+ * NOTE: (product == a) allowed
+ *
+ * KW: 4*16 = 64 muls
+ */
+#define A(row,col) a[(col<<2)+row]
+#define B(row,col) b[(col<<2)+row]
+#define P(row,col) product[(col<<2)+row]
+
+static void matmul4( GLfloat *product, const GLfloat *a, const GLfloat *b )
+{
+ GLint i;
+ for (i = 0; i < 4; i++) {
+ const GLfloat ai0=A(i,0), ai1=A(i,1), ai2=A(i,2), ai3=A(i,3);
+ P(i,0) = ai0 * B(0,0) + ai1 * B(1,0) + ai2 * B(2,0) + ai3 * B(3,0);
+ P(i,1) = ai0 * B(0,1) + ai1 * B(1,1) + ai2 * B(2,1) + ai3 * B(3,1);
+ P(i,2) = ai0 * B(0,2) + ai1 * B(1,2) + ai2 * B(2,2) + ai3 * B(3,2);
+ P(i,3) = ai0 * B(0,3) + ai1 * B(1,3) + ai2 * B(2,3) + ai3 * B(3,3);
+ }
+}
+
+
+/* Multiply two matrices known to occupy only the top three rows, such
+ * as typical model matrices, and ortho matrices.
+ */
+static void matmul34( GLfloat *product, const GLfloat *a, const GLfloat *b )
+{
+ GLint i;
+ for (i = 0; i < 3; i++) {
+ const GLfloat ai0=A(i,0), ai1=A(i,1), ai2=A(i,2), ai3=A(i,3);
+ P(i,0) = ai0 * B(0,0) + ai1 * B(1,0) + ai2 * B(2,0);
+ P(i,1) = ai0 * B(0,1) + ai1 * B(1,1) + ai2 * B(2,1);
+ P(i,2) = ai0 * B(0,2) + ai1 * B(1,2) + ai2 * B(2,2);
+ P(i,3) = ai0 * B(0,3) + ai1 * B(1,3) + ai2 * B(2,3) + ai3;
+ }
+ P(3,0) = 0;
+ P(3,1) = 0;
+ P(3,2) = 0;
+ P(3,3) = 1;
+}
+
+
+#undef A
+#undef B
+#undef P
+
+
+/*
+ * Multiply a matrix by an array of floats with known properties.
+ */
+static void matrix_multf( GLmatrix *mat, const GLfloat *m, GLuint flags )
+{
+ mat->flags |= (flags | MAT_DIRTY_TYPE | MAT_DIRTY_INVERSE);
+
+ if (TEST_MAT_FLAGS(mat, MAT_FLAGS_3D))
+ matmul34( mat->m, mat->m, m );
+ else
+ matmul4( mat->m, mat->m, m );
+}
+
+
+static void print_matrix_floats( const GLfloat m[16] )
+{
+ int i;
+ for (i=0;i<4;i++) {
+ fprintf(stderr,"\t%f %f %f %f\n", m[i], m[4+i], m[8+i], m[12+i] );
+ }
+}
+
+void
+_math_matrix_print( const GLmatrix *m )
+{
+ fprintf(stderr, "Matrix type: %s, flags: %x\n", types[m->type], m->flags);
+ print_matrix_floats(m->m);
+ fprintf(stderr, "Inverse: \n");
+ if (m->inv) {
+ GLfloat prod[16];
+ print_matrix_floats(m->inv);
+ matmul4(prod, m->m, m->inv);
+ fprintf(stderr, "Mat * Inverse:\n");
+ print_matrix_floats(prod);
+ }
+ else {
+ fprintf(stderr, " - not available\n");
+ }
+}
+
+
+
+
+#define SWAP_ROWS(a, b) { GLfloat *_tmp = a; (a)=(b); (b)=_tmp; }
+#define MAT(m,r,c) (m)[(c)*4+(r)]
+
+/*
+ * Compute inverse of 4x4 transformation matrix.
+ * Code contributed by Jacques Leroy jle@star.be
+ * Return GL_TRUE for success, GL_FALSE for failure (singular matrix)
+ */
+static GLboolean invert_matrix_general( GLmatrix *mat )
+{
+ const GLfloat *m = mat->m;
+ GLfloat *out = mat->inv;
+ GLfloat wtmp[4][8];
+ GLfloat m0, m1, m2, m3, s;
+ GLfloat *r0, *r1, *r2, *r3;
+
+ r0 = wtmp[0], r1 = wtmp[1], r2 = wtmp[2], r3 = wtmp[3];
+
+ r0[0] = MAT(m,0,0), r0[1] = MAT(m,0,1),
+ r0[2] = MAT(m,0,2), r0[3] = MAT(m,0,3),
+ r0[4] = 1.0, r0[5] = r0[6] = r0[7] = 0.0,
+
+ r1[0] = MAT(m,1,0), r1[1] = MAT(m,1,1),
+ r1[2] = MAT(m,1,2), r1[3] = MAT(m,1,3),
+ r1[5] = 1.0, r1[4] = r1[6] = r1[7] = 0.0,
+
+ r2[0] = MAT(m,2,0), r2[1] = MAT(m,2,1),
+ r2[2] = MAT(m,2,2), r2[3] = MAT(m,2,3),
+ r2[6] = 1.0, r2[4] = r2[5] = r2[7] = 0.0,
+
+ r3[0] = MAT(m,3,0), r3[1] = MAT(m,3,1),
+ r3[2] = MAT(m,3,2), r3[3] = MAT(m,3,3),
+ r3[7] = 1.0, r3[4] = r3[5] = r3[6] = 0.0;
+
+ /* choose pivot - or die */
+ if (fabs(r3[0])>fabs(r2[0])) SWAP_ROWS(r3, r2);
+ if (fabs(r2[0])>fabs(r1[0])) SWAP_ROWS(r2, r1);
+ if (fabs(r1[0])>fabs(r0[0])) SWAP_ROWS(r1, r0);
+ if (0.0 == r0[0]) return GL_FALSE;
+
+ /* eliminate first variable */
+ m1 = r1[0]/r0[0]; m2 = r2[0]/r0[0]; m3 = r3[0]/r0[0];
+ s = r0[1]; r1[1] -= m1 * s; r2[1] -= m2 * s; r3[1] -= m3 * s;
+ s = r0[2]; r1[2] -= m1 * s; r2[2] -= m2 * s; r3[2] -= m3 * s;
+ s = r0[3]; r1[3] -= m1 * s; r2[3] -= m2 * s; r3[3] -= m3 * s;
+ s = r0[4];
+ if (s != 0.0) { r1[4] -= m1 * s; r2[4] -= m2 * s; r3[4] -= m3 * s; }
+ s = r0[5];
+ if (s != 0.0) { r1[5] -= m1 * s; r2[5] -= m2 * s; r3[5] -= m3 * s; }
+ s = r0[6];
+ if (s != 0.0) { r1[6] -= m1 * s; r2[6] -= m2 * s; r3[6] -= m3 * s; }
+ s = r0[7];
+ if (s != 0.0) { r1[7] -= m1 * s; r2[7] -= m2 * s; r3[7] -= m3 * s; }
+
+ /* choose pivot - or die */
+ if (fabs(r3[1])>fabs(r2[1])) SWAP_ROWS(r3, r2);
+ if (fabs(r2[1])>fabs(r1[1])) SWAP_ROWS(r2, r1);
+ if (0.0 == r1[1]) return GL_FALSE;
+
+ /* eliminate second variable */
+ m2 = r2[1]/r1[1]; m3 = r3[1]/r1[1];
+ r2[2] -= m2 * r1[2]; r3[2] -= m3 * r1[2];
+ r2[3] -= m2 * r1[3]; r3[3] -= m3 * r1[3];
+ s = r1[4]; if (0.0 != s) { r2[4] -= m2 * s; r3[4] -= m3 * s; }
+ s = r1[5]; if (0.0 != s) { r2[5] -= m2 * s; r3[5] -= m3 * s; }
+ s = r1[6]; if (0.0 != s) { r2[6] -= m2 * s; r3[6] -= m3 * s; }
+ s = r1[7]; if (0.0 != s) { r2[7] -= m2 * s; r3[7] -= m3 * s; }
+
+ /* choose pivot - or die */
+ if (fabs(r3[2])>fabs(r2[2])) SWAP_ROWS(r3, r2);
+ if (0.0 == r2[2]) return GL_FALSE;
+
+ /* eliminate third variable */
+ m3 = r3[2]/r2[2];
+ r3[3] -= m3 * r2[3], r3[4] -= m3 * r2[4],
+ r3[5] -= m3 * r2[5], r3[6] -= m3 * r2[6],
+ r3[7] -= m3 * r2[7];
+
+ /* last check */
+ if (0.0 == r3[3]) return GL_FALSE;
+
+ s = 1.0/r3[3]; /* now back substitute row 3 */
+ r3[4] *= s; r3[5] *= s; r3[6] *= s; r3[7] *= s;
+
+ m2 = r2[3]; /* now back substitute row 2 */
+ s = 1.0/r2[2];
+ r2[4] = s * (r2[4] - r3[4] * m2), r2[5] = s * (r2[5] - r3[5] * m2),
+ r2[6] = s * (r2[6] - r3[6] * m2), r2[7] = s * (r2[7] - r3[7] * m2);
+ m1 = r1[3];
+ r1[4] -= r3[4] * m1, r1[5] -= r3[5] * m1,
+ r1[6] -= r3[6] * m1, r1[7] -= r3[7] * m1;
+ m0 = r0[3];
+ r0[4] -= r3[4] * m0, r0[5] -= r3[5] * m0,
+ r0[6] -= r3[6] * m0, r0[7] -= r3[7] * m0;
+
+ m1 = r1[2]; /* now back substitute row 1 */
+ s = 1.0/r1[1];
+ r1[4] = s * (r1[4] - r2[4] * m1), r1[5] = s * (r1[5] - r2[5] * m1),
+ r1[6] = s * (r1[6] - r2[6] * m1), r1[7] = s * (r1[7] - r2[7] * m1);
+ m0 = r0[2];
+ r0[4] -= r2[4] * m0, r0[5] -= r2[5] * m0,
+ r0[6] -= r2[6] * m0, r0[7] -= r2[7] * m0;
+
+ m0 = r0[1]; /* now back substitute row 0 */
+ s = 1.0/r0[0];
+ r0[4] = s * (r0[4] - r1[4] * m0), r0[5] = s * (r0[5] - r1[5] * m0),
+ r0[6] = s * (r0[6] - r1[6] * m0), r0[7] = s * (r0[7] - r1[7] * m0);
+
+ MAT(out,0,0) = r0[4]; MAT(out,0,1) = r0[5],
+ MAT(out,0,2) = r0[6]; MAT(out,0,3) = r0[7],
+ MAT(out,1,0) = r1[4]; MAT(out,1,1) = r1[5],
+ MAT(out,1,2) = r1[6]; MAT(out,1,3) = r1[7],
+ MAT(out,2,0) = r2[4]; MAT(out,2,1) = r2[5],
+ MAT(out,2,2) = r2[6]; MAT(out,2,3) = r2[7],
+ MAT(out,3,0) = r3[4]; MAT(out,3,1) = r3[5],
+ MAT(out,3,2) = r3[6]; MAT(out,3,3) = r3[7];
+
+ return GL_TRUE;
+}
+#undef SWAP_ROWS
+
+
+/* Adapted from graphics gems II.
+ */
+static GLboolean invert_matrix_3d_general( GLmatrix *mat )
+{
+ const GLfloat *in = mat->m;
+ GLfloat *out = mat->inv;
+ GLfloat pos, neg, t;
+ GLfloat det;
+
+ /* Calculate the determinant of upper left 3x3 submatrix and
+ * determine if the matrix is singular.
+ */
+ pos = neg = 0.0;
+ t = MAT(in,0,0) * MAT(in,1,1) * MAT(in,2,2);
+ if (t >= 0.0) pos += t; else neg += t;
+
+ t = MAT(in,1,0) * MAT(in,2,1) * MAT(in,0,2);
+ if (t >= 0.0) pos += t; else neg += t;
+
+ t = MAT(in,2,0) * MAT(in,0,1) * MAT(in,1,2);
+ if (t >= 0.0) pos += t; else neg += t;
+
+ t = -MAT(in,2,0) * MAT(in,1,1) * MAT(in,0,2);
+ if (t >= 0.0) pos += t; else neg += t;
+
+ t = -MAT(in,1,0) * MAT(in,0,1) * MAT(in,2,2);
+ if (t >= 0.0) pos += t; else neg += t;
+
+ t = -MAT(in,0,0) * MAT(in,2,1) * MAT(in,1,2);
+ if (t >= 0.0) pos += t; else neg += t;
+
+ det = pos + neg;
+
+ if (det*det < 1e-25)
+ return GL_FALSE;
+
+ det = 1.0 / det;
+ MAT(out,0,0) = ( (MAT(in,1,1)*MAT(in,2,2) - MAT(in,2,1)*MAT(in,1,2) )*det);
+ MAT(out,0,1) = (- (MAT(in,0,1)*MAT(in,2,2) - MAT(in,2,1)*MAT(in,0,2) )*det);
+ MAT(out,0,2) = ( (MAT(in,0,1)*MAT(in,1,2) - MAT(in,1,1)*MAT(in,0,2) )*det);
+ MAT(out,1,0) = (- (MAT(in,1,0)*MAT(in,2,2) - MAT(in,2,0)*MAT(in,1,2) )*det);
+ MAT(out,1,1) = ( (MAT(in,0,0)*MAT(in,2,2) - MAT(in,2,0)*MAT(in,0,2) )*det);
+ MAT(out,1,2) = (- (MAT(in,0,0)*MAT(in,1,2) - MAT(in,1,0)*MAT(in,0,2) )*det);
+ MAT(out,2,0) = ( (MAT(in,1,0)*MAT(in,2,1) - MAT(in,2,0)*MAT(in,1,1) )*det);
+ MAT(out,2,1) = (- (MAT(in,0,0)*MAT(in,2,1) - MAT(in,2,0)*MAT(in,0,1) )*det);
+ MAT(out,2,2) = ( (MAT(in,0,0)*MAT(in,1,1) - MAT(in,1,0)*MAT(in,0,1) )*det);
+
+ /* Do the translation part */
+ MAT(out,0,3) = - (MAT(in,0,3) * MAT(out,0,0) +
+ MAT(in,1,3) * MAT(out,0,1) +
+ MAT(in,2,3) * MAT(out,0,2) );
+ MAT(out,1,3) = - (MAT(in,0,3) * MAT(out,1,0) +
+ MAT(in,1,3) * MAT(out,1,1) +
+ MAT(in,2,3) * MAT(out,1,2) );
+ MAT(out,2,3) = - (MAT(in,0,3) * MAT(out,2,0) +
+ MAT(in,1,3) * MAT(out,2,1) +
+ MAT(in,2,3) * MAT(out,2,2) );
+
+ return GL_TRUE;
+}
+
+
+static GLboolean invert_matrix_3d( GLmatrix *mat )
+{
+ const GLfloat *in = mat->m;
+ GLfloat *out = mat->inv;
+
+ if (!TEST_MAT_FLAGS(mat, MAT_FLAGS_ANGLE_PRESERVING)) {
+ return invert_matrix_3d_general( mat );
+ }
+
+ if (mat->flags & MAT_FLAG_UNIFORM_SCALE) {
+ GLfloat scale = (MAT(in,0,0) * MAT(in,0,0) +
+ MAT(in,0,1) * MAT(in,0,1) +
+ MAT(in,0,2) * MAT(in,0,2));
+
+ if (scale == 0.0)
+ return GL_FALSE;
+
+ scale = 1.0 / scale;
+
+ /* Transpose and scale the 3 by 3 upper-left submatrix. */
+ MAT(out,0,0) = scale * MAT(in,0,0);
+ MAT(out,1,0) = scale * MAT(in,0,1);
+ MAT(out,2,0) = scale * MAT(in,0,2);
+ MAT(out,0,1) = scale * MAT(in,1,0);
+ MAT(out,1,1) = scale * MAT(in,1,1);
+ MAT(out,2,1) = scale * MAT(in,1,2);
+ MAT(out,0,2) = scale * MAT(in,2,0);
+ MAT(out,1,2) = scale * MAT(in,2,1);
+ MAT(out,2,2) = scale * MAT(in,2,2);
+ }
+ else if (mat->flags & MAT_FLAG_ROTATION) {
+ /* Transpose the 3 by 3 upper-left submatrix. */
+ MAT(out,0,0) = MAT(in,0,0);
+ MAT(out,1,0) = MAT(in,0,1);
+ MAT(out,2,0) = MAT(in,0,2);
+ MAT(out,0,1) = MAT(in,1,0);
+ MAT(out,1,1) = MAT(in,1,1);
+ MAT(out,2,1) = MAT(in,1,2);
+ MAT(out,0,2) = MAT(in,2,0);
+ MAT(out,1,2) = MAT(in,2,1);
+ MAT(out,2,2) = MAT(in,2,2);
+ }
+ else {
+ /* pure translation */
+ MEMCPY( out, Identity, sizeof(Identity) );
+ MAT(out,0,3) = - MAT(in,0,3);
+ MAT(out,1,3) = - MAT(in,1,3);
+ MAT(out,2,3) = - MAT(in,2,3);
+ return GL_TRUE;
+ }
+
+ if (mat->flags & MAT_FLAG_TRANSLATION) {
+ /* Do the translation part */
+ MAT(out,0,3) = - (MAT(in,0,3) * MAT(out,0,0) +
+ MAT(in,1,3) * MAT(out,0,1) +
+ MAT(in,2,3) * MAT(out,0,2) );
+ MAT(out,1,3) = - (MAT(in,0,3) * MAT(out,1,0) +
+ MAT(in,1,3) * MAT(out,1,1) +
+ MAT(in,2,3) * MAT(out,1,2) );
+ MAT(out,2,3) = - (MAT(in,0,3) * MAT(out,2,0) +
+ MAT(in,1,3) * MAT(out,2,1) +
+ MAT(in,2,3) * MAT(out,2,2) );
+ }
+ else {
+ MAT(out,0,3) = MAT(out,1,3) = MAT(out,2,3) = 0.0;
+ }
+
+ return GL_TRUE;
+}
+
+
+
+static GLboolean invert_matrix_identity( GLmatrix *mat )
+{
+ MEMCPY( mat->inv, Identity, sizeof(Identity) );
+ return GL_TRUE;
+}
+
+
+static GLboolean invert_matrix_3d_no_rot( GLmatrix *mat )
+{
+ const GLfloat *in = mat->m;
+ GLfloat *out = mat->inv;
+
+ if (MAT(in,0,0) == 0 || MAT(in,1,1) == 0 || MAT(in,2,2) == 0 )
+ return GL_FALSE;
+
+ MEMCPY( out, Identity, 16 * sizeof(GLfloat) );
+ MAT(out,0,0) = 1.0 / MAT(in,0,0);
+ MAT(out,1,1) = 1.0 / MAT(in,1,1);
+ MAT(out,2,2) = 1.0 / MAT(in,2,2);
+
+ if (mat->flags & MAT_FLAG_TRANSLATION) {
+ MAT(out,0,3) = - (MAT(in,0,3) * MAT(out,0,0));
+ MAT(out,1,3) = - (MAT(in,1,3) * MAT(out,1,1));
+ MAT(out,2,3) = - (MAT(in,2,3) * MAT(out,2,2));
+ }
+
+ return GL_TRUE;
+}
+
+
+static GLboolean invert_matrix_2d_no_rot( GLmatrix *mat )
+{
+ const GLfloat *in = mat->m;
+ GLfloat *out = mat->inv;
+
+ if (MAT(in,0,0) == 0 || MAT(in,1,1) == 0)
+ return GL_FALSE;
+
+ MEMCPY( out, Identity, 16 * sizeof(GLfloat) );
+ MAT(out,0,0) = 1.0 / MAT(in,0,0);
+ MAT(out,1,1) = 1.0 / MAT(in,1,1);
+
+ if (mat->flags & MAT_FLAG_TRANSLATION) {
+ MAT(out,0,3) = - (MAT(in,0,3) * MAT(out,0,0));
+ MAT(out,1,3) = - (MAT(in,1,3) * MAT(out,1,1));
+ }
+
+ return GL_TRUE;
+}
+
+
+static GLboolean invert_matrix_perspective( GLmatrix *mat )
+{
+ const GLfloat *in = mat->m;
+ GLfloat *out = mat->inv;
+
+ if (MAT(in,2,3) == 0)
+ return GL_FALSE;
+
+ MEMCPY( out, Identity, 16 * sizeof(GLfloat) );
+
+ MAT(out,0,0) = 1.0 / MAT(in,0,0);
+ MAT(out,1,1) = 1.0 / MAT(in,1,1);
+
+ MAT(out,0,3) = MAT(in,0,2);
+ MAT(out,1,3) = MAT(in,1,2);
+
+ MAT(out,2,2) = 0;
+ MAT(out,2,3) = -1;
+
+ MAT(out,3,2) = 1.0 / MAT(in,2,3);
+ MAT(out,3,3) = MAT(in,2,2) * MAT(out,3,2);
+
+ return GL_TRUE;
+}
+
+
+typedef GLboolean (*inv_mat_func)( GLmatrix *mat );
+
+
+static inv_mat_func inv_mat_tab[7] = {
+ invert_matrix_general,
+ invert_matrix_identity,
+ invert_matrix_3d_no_rot,
+ invert_matrix_perspective,
+ invert_matrix_3d, /* lazy! */
+ invert_matrix_2d_no_rot,
+ invert_matrix_3d
+};
+
+
+static GLboolean matrix_invert( GLmatrix *mat )
+{
+ if (inv_mat_tab[mat->type](mat)) {
+ mat->flags &= ~MAT_FLAG_SINGULAR;
+ return GL_TRUE;
+ } else {
+ mat->flags |= MAT_FLAG_SINGULAR;
+ MEMCPY( mat->inv, Identity, sizeof(Identity) );
+ return GL_FALSE;
+ }
+}
+
+
+
+
+
+
+/*
+ * Generate a 4x4 transformation matrix from glRotate parameters, and
+ * postmultiply the input matrix by it.
+ */
+void
+_math_matrix_rotate( GLmatrix *mat,
+ GLfloat angle, GLfloat x, GLfloat y, GLfloat z )
+{
+ /* This function contributed by Erich Boleyn (erich@uruk.org) */
+ GLfloat mag, s, c;
+ GLfloat xx, yy, zz, xy, yz, zx, xs, ys, zs, one_c;
+ GLfloat m[16];
+
+ s = sin( angle * DEG2RAD );
+ c = cos( angle * DEG2RAD );
+
+ mag = GL_SQRT( x*x + y*y + z*z );
+
+ if (mag <= 1.0e-4) {
+ /* generate an identity matrix and return */
+ MEMCPY(m, Identity, sizeof(GLfloat)*16);
+ return;
+ }
+
+ x /= mag;
+ y /= mag;
+ z /= mag;
+
+#define M(row,col) m[col*4+row]
+
+ /*
+ * Arbitrary axis rotation matrix.
+ *
+ * This is composed of 5 matrices, Rz, Ry, T, Ry', Rz', multiplied
+ * like so: Rz * Ry * T * Ry' * Rz'. T is the final rotation
+ * (which is about the X-axis), and the two composite transforms
+ * Ry' * Rz' and Rz * Ry are (respectively) the rotations necessary
+ * from the arbitrary axis to the X-axis then back. They are
+ * all elementary rotations.
+ *
+ * Rz' is a rotation about the Z-axis, to bring the axis vector
+ * into the x-z plane. Then Ry' is applied, rotating about the
+ * Y-axis to bring the axis vector parallel with the X-axis. The
+ * rotation about the X-axis is then performed. Ry and Rz are
+ * simply the respective inverse transforms to bring the arbitrary
+ * axis back to it's original orientation. The first transforms
+ * Rz' and Ry' are considered inverses, since the data from the
+ * arbitrary axis gives you info on how to get to it, not how
+ * to get away from it, and an inverse must be applied.
+ *
+ * The basic calculation used is to recognize that the arbitrary
+ * axis vector (x, y, z), since it is of unit length, actually
+ * represents the sines and cosines of the angles to rotate the
+ * X-axis to the same orientation, with theta being the angle about
+ * Z and phi the angle about Y (in the order described above)
+ * as follows:
+ *
+ * cos ( theta ) = x / sqrt ( 1 - z^2 )
+ * sin ( theta ) = y / sqrt ( 1 - z^2 )
+ *
+ * cos ( phi ) = sqrt ( 1 - z^2 )
+ * sin ( phi ) = z
+ *
+ * Note that cos ( phi ) can further be inserted to the above
+ * formulas:
+ *
+ * cos ( theta ) = x / cos ( phi )
+ * sin ( theta ) = y / sin ( phi )
+ *
+ * ...etc. Because of those relations and the standard trigonometric
+ * relations, it is pssible to reduce the transforms down to what
+ * is used below. It may be that any primary axis chosen will give the
+ * same results (modulo a sign convention) using thie method.
+ *
+ * Particularly nice is to notice that all divisions that might
+ * have caused trouble when parallel to certain planes or
+ * axis go away with care paid to reducing the expressions.
+ * After checking, it does perform correctly under all cases, since
+ * in all the cases of division where the denominator would have
+ * been zero, the numerator would have been zero as well, giving
+ * the expected result.
+ */
+
+ xx = x * x;
+ yy = y * y;
+ zz = z * z;
+ xy = x * y;
+ yz = y * z;
+ zx = z * x;
+ xs = x * s;
+ ys = y * s;
+ zs = z * s;
+ one_c = 1.0F - c;
+
+ M(0,0) = (one_c * xx) + c;
+ M(0,1) = (one_c * xy) - zs;
+ M(0,2) = (one_c * zx) + ys;
+ M(0,3) = 0.0F;
+
+ M(1,0) = (one_c * xy) + zs;
+ M(1,1) = (one_c * yy) + c;
+ M(1,2) = (one_c * yz) - xs;
+ M(1,3) = 0.0F;
+
+ M(2,0) = (one_c * zx) - ys;
+ M(2,1) = (one_c * yz) + xs;
+ M(2,2) = (one_c * zz) + c;
+ M(2,3) = 0.0F;
+
+ M(3,0) = 0.0F;
+ M(3,1) = 0.0F;
+ M(3,2) = 0.0F;
+ M(3,3) = 1.0F;
+
+#undef M
+
+ matrix_multf( mat, m, MAT_FLAG_ROTATION );
+}
+
+
+void
+_math_matrix_frustrum( GLmatrix *mat,
+ GLfloat left, GLfloat right,
+ GLfloat bottom, GLfloat top,
+ GLfloat nearval, GLfloat farval )
+{
+ GLfloat x, y, a, b, c, d;
+ GLfloat m[16];
+
+ x = (2.0*nearval) / (right-left);
+ y = (2.0*nearval) / (top-bottom);
+ a = (right+left) / (right-left);
+ b = (top+bottom) / (top-bottom);
+ c = -(farval+nearval) / ( farval-nearval);
+ d = -(2.0*farval*nearval) / (farval-nearval); /* error? */
+
+#define M(row,col) m[col*4+row]
+ M(0,0) = x; M(0,1) = 0.0F; M(0,2) = a; M(0,3) = 0.0F;
+ M(1,0) = 0.0F; M(1,1) = y; M(1,2) = b; M(1,3) = 0.0F;
+ M(2,0) = 0.0F; M(2,1) = 0.0F; M(2,2) = c; M(2,3) = d;
+ M(3,0) = 0.0F; M(3,1) = 0.0F; M(3,2) = -1.0F; M(3,3) = 0.0F;
+#undef M
+
+ matrix_multf( mat, m, MAT_FLAG_PERSPECTIVE );
+}
+
+void
+_math_matrix_ortho( GLmatrix *mat,
+ GLfloat left, GLfloat right,
+ GLfloat bottom, GLfloat top,
+ GLfloat nearval, GLfloat farval )
+{
+ GLfloat x, y, z;
+ GLfloat tx, ty, tz;
+ GLfloat m[16];
+
+ x = 2.0 / (right-left);
+ y = 2.0 / (top-bottom);
+ z = -2.0 / (farval-nearval);
+ tx = -(right+left) / (right-left);
+ ty = -(top+bottom) / (top-bottom);
+ tz = -(farval+nearval) / (farval-nearval);
+
+#define M(row,col) m[col*4+row]
+ M(0,0) = x; M(0,1) = 0.0F; M(0,2) = 0.0F; M(0,3) = tx;
+ M(1,0) = 0.0F; M(1,1) = y; M(1,2) = 0.0F; M(1,3) = ty;
+ M(2,0) = 0.0F; M(2,1) = 0.0F; M(2,2) = z; M(2,3) = tz;
+ M(3,0) = 0.0F; M(3,1) = 0.0F; M(3,2) = 0.0F; M(3,3) = 1.0F;
+#undef M
+
+ matrix_multf( mat, m, (MAT_FLAG_GENERAL_SCALE|MAT_FLAG_TRANSLATION));
+}
+
+
+#define ZERO(x) (1<<x)
+#define ONE(x) (1<<(x+16))
+
+#define MASK_NO_TRX (ZERO(12) | ZERO(13) | ZERO(14))
+#define MASK_NO_2D_SCALE ( ONE(0) | ONE(5))
+
+#define MASK_IDENTITY ( ONE(0) | ZERO(4) | ZERO(8) | ZERO(12) |\
+ ZERO(1) | ONE(5) | ZERO(9) | ZERO(13) |\
+ ZERO(2) | ZERO(6) | ONE(10) | ZERO(14) |\
+ ZERO(3) | ZERO(7) | ZERO(11) | ONE(15) )
+
+#define MASK_2D_NO_ROT ( ZERO(4) | ZERO(8) | \
+ ZERO(1) | ZERO(9) | \
+ ZERO(2) | ZERO(6) | ONE(10) | ZERO(14) |\
+ ZERO(3) | ZERO(7) | ZERO(11) | ONE(15) )
+
+#define MASK_2D ( ZERO(8) | \
+ ZERO(9) | \
+ ZERO(2) | ZERO(6) | ONE(10) | ZERO(14) |\
+ ZERO(3) | ZERO(7) | ZERO(11) | ONE(15) )
+
+
+#define MASK_3D_NO_ROT ( ZERO(4) | ZERO(8) | \
+ ZERO(1) | ZERO(9) | \
+ ZERO(2) | ZERO(6) | \
+ ZERO(3) | ZERO(7) | ZERO(11) | ONE(15) )
+
+#define MASK_3D ( \
+ \
+ \
+ ZERO(3) | ZERO(7) | ZERO(11) | ONE(15) )
+
+
+#define MASK_PERSPECTIVE ( ZERO(4) | ZERO(12) |\
+ ZERO(1) | ZERO(13) |\
+ ZERO(2) | ZERO(6) | \
+ ZERO(3) | ZERO(7) | ZERO(15) )
+
+#define SQ(x) ((x)*(x))
+
+/* Determine type and flags from scratch. This is expensive enough to
+ * only want to do it once.
+ */
+static void analyze_from_scratch( GLmatrix *mat )
+{
+ const GLfloat *m = mat->m;
+ GLuint mask = 0;
+ GLuint i;
+
+ for (i = 0 ; i < 16 ; i++) {
+ if (m[i] == 0.0) mask |= (1<<i);
+ }
+
+ if (m[0] == 1.0F) mask |= (1<<16);
+ if (m[5] == 1.0F) mask |= (1<<21);
+ if (m[10] == 1.0F) mask |= (1<<26);
+ if (m[15] == 1.0F) mask |= (1<<31);
+
+ mat->flags &= ~MAT_FLAGS_GEOMETRY;
+
+ /* Check for translation - no-one really cares
+ */
+ if ((mask & MASK_NO_TRX) != MASK_NO_TRX)
+ mat->flags |= MAT_FLAG_TRANSLATION;
+
+ /* Do the real work
+ */
+ if (mask == MASK_IDENTITY) {
+ mat->type = MATRIX_IDENTITY;
+ }
+ else if ((mask & MASK_2D_NO_ROT) == MASK_2D_NO_ROT) {
+ mat->type = MATRIX_2D_NO_ROT;
+
+ if ((mask & MASK_NO_2D_SCALE) != MASK_NO_2D_SCALE)
+ mat->flags = MAT_FLAG_GENERAL_SCALE;
+ }
+ else if ((mask & MASK_2D) == MASK_2D) {
+ GLfloat mm = DOT2(m, m);
+ GLfloat m4m4 = DOT2(m+4,m+4);
+ GLfloat mm4 = DOT2(m,m+4);
+
+ mat->type = MATRIX_2D;
+
+ /* Check for scale */
+ if (SQ(mm-1) > SQ(1e-6) ||
+ SQ(m4m4-1) > SQ(1e-6))
+ mat->flags |= MAT_FLAG_GENERAL_SCALE;
+
+ /* Check for rotation */
+ if (SQ(mm4) > SQ(1e-6))
+ mat->flags |= MAT_FLAG_GENERAL_3D;
+ else
+ mat->flags |= MAT_FLAG_ROTATION;
+
+ }
+ else if ((mask & MASK_3D_NO_ROT) == MASK_3D_NO_ROT) {
+ mat->type = MATRIX_3D_NO_ROT;
+
+ /* Check for scale */
+ if (SQ(m[0]-m[5]) < SQ(1e-6) &&
+ SQ(m[0]-m[10]) < SQ(1e-6)) {
+ if (SQ(m[0]-1.0) > SQ(1e-6)) {
+ mat->flags |= MAT_FLAG_UNIFORM_SCALE;
+ }
+ }
+ else {
+ mat->flags |= MAT_FLAG_GENERAL_SCALE;
+ }
+ }
+ else if ((mask & MASK_3D) == MASK_3D) {
+ GLfloat c1 = DOT3(m,m);
+ GLfloat c2 = DOT3(m+4,m+4);
+ GLfloat c3 = DOT3(m+8,m+8);
+ GLfloat d1 = DOT3(m, m+4);
+ GLfloat cp[3];
+
+ mat->type = MATRIX_3D;
+
+ /* Check for scale */
+ if (SQ(c1-c2) < SQ(1e-6) && SQ(c1-c3) < SQ(1e-6)) {
+ if (SQ(c1-1.0) > SQ(1e-6))
+ mat->flags |= MAT_FLAG_UNIFORM_SCALE;
+ /* else no scale at all */
+ }
+ else {
+ mat->flags |= MAT_FLAG_GENERAL_SCALE;
+ }
+
+ /* Check for rotation */
+ if (SQ(d1) < SQ(1e-6)) {
+ CROSS3( cp, m, m+4 );
+ SUB_3V( cp, cp, (m+8) );
+ if (LEN_SQUARED_3FV(cp) < SQ(1e-6))
+ mat->flags |= MAT_FLAG_ROTATION;
+ else
+ mat->flags |= MAT_FLAG_GENERAL_3D;
+ }
+ else {
+ mat->flags |= MAT_FLAG_GENERAL_3D; /* shear, etc */
+ }
+ }
+ else if ((mask & MASK_PERSPECTIVE) == MASK_PERSPECTIVE && m[11]==-1.0F) {
+ mat->type = MATRIX_PERSPECTIVE;
+ mat->flags |= MAT_FLAG_GENERAL;
+ }
+ else {
+ mat->type = MATRIX_GENERAL;
+ mat->flags |= MAT_FLAG_GENERAL;
+ }
+}
+
+
+/* Analyse a matrix given that its flags are accurate - this is the
+ * more common operation, hopefully.
+ */
+static void analyze_from_flags( GLmatrix *mat )
+{
+ const GLfloat *m = mat->m;
+
+ if (TEST_MAT_FLAGS(mat, 0)) {
+ mat->type = MATRIX_IDENTITY;
+ }
+ else if (TEST_MAT_FLAGS(mat, (MAT_FLAG_TRANSLATION |
+ MAT_FLAG_UNIFORM_SCALE |
+ MAT_FLAG_GENERAL_SCALE))) {
+ if ( m[10]==1.0F && m[14]==0.0F ) {
+ mat->type = MATRIX_2D_NO_ROT;
+ }
+ else {
+ mat->type = MATRIX_3D_NO_ROT;
+ }
+ }
+ else if (TEST_MAT_FLAGS(mat, MAT_FLAGS_3D)) {
+ if ( m[ 8]==0.0F
+ && m[ 9]==0.0F
+ && m[2]==0.0F && m[6]==0.0F && m[10]==1.0F && m[14]==0.0F) {
+ mat->type = MATRIX_2D;
+ }
+ else {
+ mat->type = MATRIX_3D;
+ }
+ }
+ else if ( m[4]==0.0F && m[12]==0.0F
+ && m[1]==0.0F && m[13]==0.0F
+ && m[2]==0.0F && m[6]==0.0F
+ && m[3]==0.0F && m[7]==0.0F && m[11]==-1.0F && m[15]==0.0F) {
+ mat->type = MATRIX_PERSPECTIVE;
+ }
+ else {
+ mat->type = MATRIX_GENERAL;
+ }
+}
+
+
+void
+_math_matrix_analyze( GLmatrix *mat )
+{
+ if (mat->flags & MAT_DIRTY_TYPE) {
+ if (mat->flags & MAT_DIRTY_FLAGS)
+ analyze_from_scratch( mat );
+ else
+ analyze_from_flags( mat );
+ }
+
+ if (mat->inv && (mat->flags & MAT_DIRTY_INVERSE)) {
+ matrix_invert( mat );
+ }
+
+ mat->flags &= ~(MAT_DIRTY_FLAGS|
+ MAT_DIRTY_TYPE|
+ MAT_DIRTY_INVERSE);
+}
+
+
+void
+_math_matrix_copy( GLmatrix *to, const GLmatrix *from )
+{
+ MEMCPY( to->m, from->m, sizeof(Identity) );
+ to->flags = from->flags;
+ to->type = from->type;
+
+ if (to->inv != 0) {
+ if (from->inv == 0) {
+ matrix_invert( to );
+ }
+ else {
+ MEMCPY(to->inv, from->inv, sizeof(GLfloat)*16);
+ }
+ }
+}
+
+
+void
+_math_matrix_scale( GLmatrix *mat, GLfloat x, GLfloat y, GLfloat z )
+{
+ GLfloat *m = mat->m;
+ m[0] *= x; m[4] *= y; m[8] *= z;
+ m[1] *= x; m[5] *= y; m[9] *= z;
+ m[2] *= x; m[6] *= y; m[10] *= z;
+ m[3] *= x; m[7] *= y; m[11] *= z;
+
+ if (fabs(x - y) < 1e-8 && fabs(x - z) < 1e-8)
+ mat->flags |= MAT_FLAG_UNIFORM_SCALE;
+ else
+ mat->flags |= MAT_FLAG_GENERAL_SCALE;
+
+ mat->flags |= (MAT_DIRTY_TYPE |
+ MAT_DIRTY_INVERSE);
+}
+
+
+void
+_math_matrix_translate( GLmatrix *mat, GLfloat x, GLfloat y, GLfloat z )
+{
+ GLfloat *m = mat->m;
+ m[12] = m[0] * x + m[4] * y + m[8] * z + m[12];
+ m[13] = m[1] * x + m[5] * y + m[9] * z + m[13];
+ m[14] = m[2] * x + m[6] * y + m[10] * z + m[14];
+ m[15] = m[3] * x + m[7] * y + m[11] * z + m[15];
+
+ mat->flags |= (MAT_FLAG_TRANSLATION |
+ MAT_DIRTY_TYPE |
+ MAT_DIRTY_INVERSE);
+}
+
+
+void
+_math_matrix_loadf( GLmatrix *mat, const GLfloat *m )
+{
+ MEMCPY( mat->m, m, 16*sizeof(GLfloat) );
+ mat->flags = (MAT_FLAG_GENERAL | MAT_DIRTY);
+}
+
+void
+_math_matrix_ctr( GLmatrix *m )
+{
+ if ( m->m == 0 ) {
+ m->m = (GLfloat *) ALIGN_MALLOC( 16 * sizeof(GLfloat), 16 );
+ }
+ MEMCPY( m->m, Identity, sizeof(Identity) );
+ m->inv = 0;
+ m->type = MATRIX_IDENTITY;
+ m->flags = 0;
+}
+
+void
+_math_matrix_dtr( GLmatrix *m )
+{
+ if ( m->m != 0 ) {
+ ALIGN_FREE( m->m );
+ m->m = 0;
+ }
+ if ( m->inv != 0 ) {
+ ALIGN_FREE( m->inv );
+ m->inv = 0;
+ }
+}
+
+
+void
+_math_matrix_alloc_inv( GLmatrix *m )
+{
+ if ( m->inv == 0 ) {
+ m->inv = (GLfloat *) ALIGN_MALLOC( 16 * sizeof(GLfloat), 16 );
+ MEMCPY( m->inv, Identity, 16 * sizeof(GLfloat) );
+ }
+}
+
+
+void
+_math_matrix_mul_matrix( GLmatrix *dest, const GLmatrix *a, const GLmatrix *b )
+{
+ dest->flags = (a->flags |
+ b->flags |
+ MAT_DIRTY_TYPE |
+ MAT_DIRTY_INVERSE);
+
+ if (TEST_MAT_FLAGS(dest, MAT_FLAGS_3D))
+ matmul34( dest->m, a->m, b->m );
+ else
+ matmul4( dest->m, a->m, b->m );
+}
+
+
+void
+_math_matrix_mul_floats( GLmatrix *dest, const GLfloat *m )
+{
+ dest->flags |= (MAT_FLAG_GENERAL |
+ MAT_DIRTY_TYPE |
+ MAT_DIRTY_INVERSE);
+
+ matmul4( dest->m, dest->m, m );
+}
+
+void
+_math_matrix_set_identity( GLmatrix *mat )
+{
+ MEMCPY( mat->m, Identity, 16*sizeof(GLfloat) );
+
+ if (mat->inv)
+ MEMCPY( mat->inv, Identity, 16*sizeof(GLfloat) );
+
+ mat->type = MATRIX_IDENTITY;
+ mat->flags &= ~(MAT_DIRTY_FLAGS|
+ MAT_DIRTY_TYPE|
+ MAT_DIRTY_INVERSE);
+}
+
+
+
+void
+_math_transposef( GLfloat to[16], const GLfloat from[16] )
+{
+ to[0] = from[0];
+ to[1] = from[4];
+ to[2] = from[8];
+ to[3] = from[12];
+ to[4] = from[1];
+ to[5] = from[5];
+ to[6] = from[9];
+ to[7] = from[13];
+ to[8] = from[2];
+ to[9] = from[6];
+ to[10] = from[10];
+ to[11] = from[14];
+ to[12] = from[3];
+ to[13] = from[7];
+ to[14] = from[11];
+ to[15] = from[15];
+}
+
+
+void
+_math_transposed( GLdouble to[16], const GLdouble from[16] )
+{
+ to[0] = from[0];
+ to[1] = from[4];
+ to[2] = from[8];
+ to[3] = from[12];
+ to[4] = from[1];
+ to[5] = from[5];
+ to[6] = from[9];
+ to[7] = from[13];
+ to[8] = from[2];
+ to[9] = from[6];
+ to[10] = from[10];
+ to[11] = from[14];
+ to[12] = from[3];
+ to[13] = from[7];
+ to[14] = from[11];
+ to[15] = from[15];
+}
+
+void
+_math_transposefd( GLfloat to[16], const GLdouble from[16] )
+{
+ to[0] = from[0];
+ to[1] = from[4];
+ to[2] = from[8];
+ to[3] = from[12];
+ to[4] = from[1];
+ to[5] = from[5];
+ to[6] = from[9];
+ to[7] = from[13];
+ to[8] = from[2];
+ to[9] = from[6];
+ to[10] = from[10];
+ to[11] = from[14];
+ to[12] = from[3];
+ to[13] = from[7];
+ to[14] = from[11];
+ to[15] = from[15];
+}
diff --git a/src/mesa/math/m_matrix.h b/src/mesa/math/m_matrix.h
new file mode 100644
index 0000000000..8eedbdb942
--- /dev/null
+++ b/src/mesa/math/m_matrix.h
@@ -0,0 +1,176 @@
+/* $Id: m_matrix.h,v 1.1 2000/11/16 21:05:41 keithw Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version: 3.5
+ *
+ * Copyright (C) 1999-2000 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef _M_MATRIX_H
+#define _M_MATRIX_H
+
+
+
+/* Give symbolic names to some of the entries in the matrix to help
+ * out with the rework of the viewport_map as a matrix transform.
+ */
+#define MAT_SX 0
+#define MAT_SY 5
+#define MAT_SZ 10
+#define MAT_TX 12
+#define MAT_TY 13
+#define MAT_TZ 14
+
+/*
+ * Different kinds of 4x4 transformation matrices:
+ */
+#define MATRIX_GENERAL 0 /* general 4x4 matrix */
+#define MATRIX_IDENTITY 1 /* identity matrix */
+#define MATRIX_3D_NO_ROT 2 /* ortho projection and others... */
+#define MATRIX_PERSPECTIVE 3 /* perspective projection matrix */
+#define MATRIX_2D 4 /* 2-D transformation */
+#define MATRIX_2D_NO_ROT 5 /* 2-D scale & translate only */
+#define MATRIX_3D 6 /* 3-D transformation */
+
+#define MAT_FLAG_IDENTITY 0
+#define MAT_FLAG_GENERAL 0x1
+#define MAT_FLAG_ROTATION 0x2
+#define MAT_FLAG_TRANSLATION 0x4
+#define MAT_FLAG_UNIFORM_SCALE 0x8
+#define MAT_FLAG_GENERAL_SCALE 0x10
+#define MAT_FLAG_GENERAL_3D 0x20
+#define MAT_FLAG_PERSPECTIVE 0x40
+#define MAT_FLAG_SINGULAR 0x80
+#define MAT_DIRTY_TYPE 0x100
+#define MAT_DIRTY_FLAGS 0x200
+#define MAT_DIRTY_INVERSE 0x400
+
+#define MAT_FLAGS_ANGLE_PRESERVING (MAT_FLAG_ROTATION | \
+ MAT_FLAG_TRANSLATION | \
+ MAT_FLAG_UNIFORM_SCALE)
+
+#define MAT_FLAGS_LENGTH_PRESERVING (MAT_FLAG_ROTATION | \
+ MAT_FLAG_TRANSLATION)
+
+#define MAT_FLAGS_3D (MAT_FLAG_ROTATION | \
+ MAT_FLAG_TRANSLATION | \
+ MAT_FLAG_UNIFORM_SCALE | \
+ MAT_FLAG_GENERAL_SCALE | \
+ MAT_FLAG_GENERAL_3D)
+
+#define MAT_FLAGS_GEOMETRY (MAT_FLAG_GENERAL | \
+ MAT_FLAG_ROTATION | \
+ MAT_FLAG_TRANSLATION | \
+ MAT_FLAG_UNIFORM_SCALE | \
+ MAT_FLAG_GENERAL_SCALE | \
+ MAT_FLAG_GENERAL_3D | \
+ MAT_FLAG_PERSPECTIVE | \
+ MAT_FLAG_SINGULAR)
+
+#define MAT_DIRTY (MAT_DIRTY_TYPE | \
+ MAT_DIRTY_FLAGS | \
+ MAT_DIRTY_INVERSE)
+
+#define TEST_MAT_FLAGS(mat, a) \
+ ((MAT_FLAGS_GEOMETRY & (~(a)) & ((mat)->flags) ) == 0)
+
+
+typedef struct {
+ GLfloat *m; /* 16-byte aligned */
+ GLfloat *inv; /* optional, 16-byte aligned */
+ GLuint flags;
+ GLuint type; /* one of the MATRIX_* values */
+} GLmatrix;
+
+
+
+
+extern void
+_math_matrix_ctr( GLmatrix *m );
+
+extern void
+_math_matrix_dtr( GLmatrix *m );
+
+extern void
+_math_matrix_alloc_inv( GLmatrix *m );
+
+extern void
+_math_matrix_mul_matrix( GLmatrix *dest, const GLmatrix *a, const GLmatrix *b );
+
+extern void
+_math_matrix_mul_floats( GLmatrix *dest, const GLfloat *b );
+
+extern void
+_math_matrix_loadf( GLmatrix *mat, const GLfloat *m );
+
+extern void
+_math_matrix_translate( GLmatrix *mat, GLfloat x, GLfloat y, GLfloat z );
+
+extern void
+_math_matrix_rotate( GLmatrix *m, GLfloat angle,
+ GLfloat x, GLfloat y, GLfloat z );
+
+extern void
+_math_matrix_scale( GLmatrix *mat, GLfloat x, GLfloat y, GLfloat z );
+
+extern void
+_math_matrix_ortho( GLmatrix *mat,
+ GLfloat left, GLfloat right,
+ GLfloat bottom, GLfloat top,
+ GLfloat nearval, GLfloat farval );
+
+extern void
+_math_matrix_frustrum( GLmatrix *mat,
+ GLfloat left, GLfloat right,
+ GLfloat bottom, GLfloat top,
+ GLfloat nearval, GLfloat farval );
+
+extern void
+_math_matrix_set_identity( GLmatrix *dest );
+
+extern void
+_math_matrix_copy( GLmatrix *to, const GLmatrix *from );
+
+extern void
+_math_matrix_analyze( GLmatrix *mat );
+
+extern void
+_math_matrix_print( const GLmatrix *m );
+
+
+
+
+/* Related functions that don't actually operate on GLmatrix structs:
+ */
+extern void
+_math_transposef( GLfloat to[16], const GLfloat from[16] );
+
+extern void
+_math_transposed( GLdouble to[16], const GLdouble from[16] );
+
+extern void
+_math_transposefd( GLfloat to[16], const GLdouble from[16] );
+
+
+
+
+#endif
diff --git a/src/mesa/math/m_norm_tmp.h b/src/mesa/math/m_norm_tmp.h
new file mode 100644
index 0000000000..72770c2075
--- /dev/null
+++ b/src/mesa/math/m_norm_tmp.h
@@ -0,0 +1,413 @@
+/* $Id: m_norm_tmp.h,v 1.1 2000/11/16 21:05:41 keithw Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version: 3.3
+ *
+ * Copyright (C) 1999-2000 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * New (3.1) transformation code written by Keith Whitwell.
+ */
+
+
+static void _XFORMAPI
+TAG(transform_normalize_normals)( const GLmatrix *mat,
+ GLfloat scale,
+ const GLvector3f *in,
+ const GLfloat *lengths,
+ const GLubyte mask[],
+ GLvector3f *dest )
+{
+ GLuint i;
+ const GLfloat *from = in->start;
+ GLuint stride = in->stride;
+ GLuint count = in->count;
+ GLfloat (*out)[3] = (GLfloat (*)[3])dest->start;
+ GLfloat *m = mat->inv;
+ GLfloat m0 = m[0], m4 = m[4], m8 = m[8];
+ GLfloat m1 = m[1], m5 = m[5], m9 = m[9];
+ GLfloat m2 = m[2], m6 = m[6], m10 = m[10];
+
+ (void) mask;
+ if (!lengths) {
+ STRIDE_LOOP {
+ CULL_CHECK {
+ GLfloat tx, ty, tz;
+ {
+ const GLfloat ux = from[0], uy = from[1], uz = from[2];
+ tx = ux * m0 + uy * m1 + uz * m2;
+ ty = ux * m4 + uy * m5 + uz * m6;
+ tz = ux * m8 + uy * m9 + uz * m10;
+ }
+ {
+ GLdouble len = tx*tx + ty*ty + tz*tz;
+ if (len > 1e-20) {
+ GLdouble scale = 1.0 / GL_SQRT(len);
+ out[i][0] = (GLfloat) (tx * scale);
+ out[i][1] = (GLfloat) (ty * scale);
+ out[i][2] = (GLfloat) (tz * scale);
+ }
+ else
+ {
+ out[i][0] = out[i][1] = out[i][2] = 0;
+ }
+ }
+ }
+ }
+ }
+ else {
+ /* scale has been snapped to 1.0 if it is close.
+ */
+ if (scale != 1.0) {
+ m0 *= scale, m4 *= scale, m8 *= scale;
+ m1 *= scale, m5 *= scale, m9 *= scale;
+ m2 *= scale, m6 *= scale, m10 *= scale;
+ }
+
+ STRIDE_LOOP {
+ CULL_CHECK {
+ GLfloat tx, ty, tz;
+ {
+ const GLfloat ux = from[0], uy = from[1], uz = from[2];
+ tx = ux * m0 + uy * m1 + uz * m2;
+ ty = ux * m4 + uy * m5 + uz * m6;
+ tz = ux * m8 + uy * m9 + uz * m10;
+ }
+ {
+ GLfloat len = lengths[i];
+ out[i][0] = tx * len;
+ out[i][1] = ty * len;
+ out[i][2] = tz * len;
+ }
+ }
+ }
+ }
+ dest->count = in->count;
+}
+
+
+static void _XFORMAPI
+TAG(transform_normalize_normals_no_rot)( const GLmatrix *mat,
+ GLfloat scale,
+ const GLvector3f *in,
+ const GLfloat *lengths,
+ const GLubyte mask[],
+ GLvector3f *dest )
+{
+ GLuint i;
+ const GLfloat *from = in->start;
+ GLuint stride = in->stride;
+ GLuint count = in->count;
+ GLfloat (*out)[3] = (GLfloat (*)[3])dest->start;
+ GLfloat *m = mat->inv;
+ GLfloat m0 = m[0];
+ GLfloat m5 = m[5];
+ GLfloat m10 = m[10];
+ (void) mask;
+ if (!lengths) {
+ STRIDE_LOOP {
+ CULL_CHECK {
+ GLfloat tx, ty, tz;
+ {
+ const GLfloat ux = from[0], uy = from[1], uz = from[2];
+ tx = ux * m0 ;
+ ty = uy * m5 ;
+ tz = uz * m10;
+ }
+ {
+ GLdouble len = tx*tx + ty*ty + tz*tz;
+ if (len > 1e-20) {
+ GLdouble scale = 1.0 / GL_SQRT(len);
+ out[i][0] = (GLfloat) (tx * scale);
+ out[i][1] = (GLfloat) (ty * scale);
+ out[i][2] = (GLfloat) (tz * scale);
+ }
+ else
+ {
+ out[i][0] = out[i][1] = out[i][2] = 0;
+ }
+ }
+ }
+ }
+ }
+ else {
+ /* scale has been snapped to 1.0 if it is close.
+ */
+ if (scale != 1.0) {
+ m0 *= scale;
+ m5 *= scale;
+ m10 *= scale;
+ }
+
+ STRIDE_LOOP {
+ CULL_CHECK {
+ GLfloat tx, ty, tz;
+ {
+ const GLfloat ux = from[0], uy = from[1], uz = from[2];
+ tx = ux * m0 ;
+ ty = uy * m5 ;
+ tz = uz * m10;
+ }
+ {
+ GLfloat len = lengths[i];
+ out[i][0] = tx * len;
+ out[i][1] = ty * len;
+ out[i][2] = tz * len;
+ }
+ }
+ }
+ }
+ dest->count = in->count;
+}
+
+
+static void _XFORMAPI
+TAG(transform_rescale_normals_no_rot)( const GLmatrix *mat,
+ GLfloat scale,
+ const GLvector3f *in,
+ const GLfloat *lengths,
+ const GLubyte mask[],
+ GLvector3f *dest )
+{
+ GLuint i;
+ const GLfloat *from = in->start;
+ GLuint stride = in->stride;
+ GLuint count = in->count;
+ GLfloat (*out)[3] = (GLfloat (*)[3])dest->start;
+ const GLfloat *m = mat->inv;
+ GLfloat m0 = scale*m[0];
+ GLfloat m5 = scale*m[5];
+ GLfloat m10 = scale*m[10];
+ (void) lengths;
+ (void) mask;
+ STRIDE_LOOP {
+ CULL_CHECK {
+ GLfloat ux = from[0], uy = from[1], uz = from[2];
+ out[i][0] = ux * m0;
+ out[i][1] = uy * m5;
+ out[i][2] = uz * m10;
+ }
+ }
+ dest->count = in->count;
+}
+
+static void _XFORMAPI
+TAG(transform_rescale_normals)( const GLmatrix *mat,
+ GLfloat scale,
+ const GLvector3f *in,
+ const GLfloat *lengths,
+ const GLubyte mask[],
+ GLvector3f *dest )
+{
+ GLuint i;
+ const GLfloat *from = in->start;
+ GLuint stride = in->stride;
+ GLuint count = in->count;
+ GLfloat (*out)[3] = (GLfloat (*)[3])dest->start;
+ /* Since we are unlikely to have < 3 vertices in the buffer,
+ * it makes sense to pre-multiply by scale.
+ */
+ const GLfloat *m = mat->inv;
+ GLfloat m0 = scale*m[0], m4 = scale*m[4], m8 = scale*m[8];
+ GLfloat m1 = scale*m[1], m5 = scale*m[5], m9 = scale*m[9];
+ GLfloat m2 = scale*m[2], m6 = scale*m[6], m10 = scale*m[10];
+ (void) lengths;
+ (void) mask;
+ STRIDE_LOOP {
+ CULL_CHECK {
+ GLfloat ux = from[0], uy = from[1], uz = from[2];
+ out[i][0] = ux * m0 + uy * m1 + uz * m2;
+ out[i][1] = ux * m4 + uy * m5 + uz * m6;
+ out[i][2] = ux * m8 + uy * m9 + uz * m10;
+ }
+ }
+ dest->count = in->count;
+}
+
+
+static void _XFORMAPI
+TAG(transform_normals_no_rot)(const GLmatrix *mat,
+ GLfloat scale,
+ const GLvector3f *in,
+ const GLfloat *lengths,
+ const GLubyte mask[],
+ GLvector3f *dest )
+{
+ GLuint i;
+ const GLfloat *from = in->start;
+ GLuint stride = in->stride;
+ GLuint count = in->count;
+ GLfloat (*out)[3] = (GLfloat (*)[3])dest->start;
+ const GLfloat *m = mat->inv;
+ GLfloat m0 = m[0];
+ GLfloat m5 = m[5];
+ GLfloat m10 = m[10];
+ (void) scale;
+ (void) lengths;
+ (void) mask;
+ STRIDE_LOOP {
+ CULL_CHECK {
+ GLfloat ux = from[0], uy = from[1], uz = from[2];
+ out[i][0] = ux * m0;
+ out[i][1] = uy * m5;
+ out[i][2] = uz * m10;
+ }
+ }
+ dest->count = in->count;
+}
+
+
+static void _XFORMAPI
+TAG(transform_normals)( const GLmatrix *mat,
+ GLfloat scale,
+ const GLvector3f *in,
+ const GLfloat *lengths,
+ const GLubyte mask[],
+ GLvector3f *dest )
+{
+ GLuint i;
+ const GLfloat *from = in->start;
+ GLuint stride = in->stride;
+ GLuint count = in->count;
+ GLfloat (*out)[3] = (GLfloat (*)[3])dest->start;
+ const GLfloat *m = mat->inv;
+ GLfloat m0 = m[0], m4 = m[4], m8 = m[8];
+ GLfloat m1 = m[1], m5 = m[5], m9 = m[9];
+ GLfloat m2 = m[2], m6 = m[6], m10 = m[10];
+ (void) scale;
+ (void) lengths;
+ (void) mask;
+ STRIDE_LOOP {
+ CULL_CHECK {
+ GLfloat ux = from[0], uy = from[1], uz = from[2];
+ out[i][0] = ux * m0 + uy * m1 + uz * m2;
+ out[i][1] = ux * m4 + uy * m5 + uz * m6;
+ out[i][2] = ux * m8 + uy * m9 + uz * m10;
+ }
+ }
+ dest->count = in->count;
+}
+
+
+static void _XFORMAPI
+TAG(normalize_normals)( const GLmatrix *mat,
+ GLfloat scale,
+ const GLvector3f *in,
+ const GLfloat *lengths,
+ const GLubyte mask[],
+ GLvector3f *dest )
+{
+ GLuint i;
+ const GLfloat *from = in->start;
+ GLuint stride = in->stride;
+ GLuint count = in->count;
+ GLfloat (*out)[3] = (GLfloat (*)[3])dest->start;
+ (void) mat;
+ (void) mask;
+ (void) scale;
+ if (lengths) {
+ STRIDE_LOOP {
+ CULL_CHECK {
+ const GLfloat x = from[0], y = from[1], z = from[2];
+ GLfloat invlen = lengths[i];
+ out[i][0] = x * invlen;
+ out[i][1] = y * invlen;
+ out[i][2] = z * invlen;
+ }
+ }
+ }
+ else {
+ STRIDE_LOOP {
+ CULL_CHECK {
+ const GLfloat x = from[0], y = from[1], z = from[2];
+ GLdouble len = x * x + y * y + z * z;
+ if (len > 1e-50) {
+ len = 1.0 / GL_SQRT(len);
+ out[i][0] = (GLfloat) (x * len);
+ out[i][1] = (GLfloat) (y * len);
+ out[i][2] = (GLfloat) (z * len);
+ }
+ else {
+ out[i][0] = x;
+ out[i][1] = y;
+ out[i][2] = z;
+ }
+ }
+ }
+ }
+ dest->count = in->count;
+}
+
+
+static void _XFORMAPI
+TAG(rescale_normals)( const GLmatrix *mat,
+ GLfloat scale,
+ const GLvector3f *in,
+ const GLfloat *lengths,
+ const GLubyte mask[],
+ GLvector3f *dest )
+{
+ GLuint i;
+ const GLfloat *from = in->start;
+ GLuint stride = in->stride;
+ GLuint count = in->count;
+ GLfloat (*out)[3] = (GLfloat (*)[3])dest->start;
+ (void) mat;
+ (void) lengths;
+ (void) mask;
+
+ STRIDE_LOOP {
+ CULL_CHECK {
+ SCALE_SCALAR_3V( out[i], scale, from );
+ }
+ }
+ dest->count = in->count;
+}
+
+
+static void _XFORMAPI
+TAG(init_c_norm_transform)( void )
+{
+ gl_normal_tab[NORM_TRANSFORM_NO_ROT][IDX] =
+ TAG(transform_normals_no_rot);
+
+ gl_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_RESCALE][IDX] =
+ TAG(transform_rescale_normals_no_rot);
+
+ gl_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_NORMALIZE][IDX] =
+ TAG(transform_normalize_normals_no_rot);
+
+ gl_normal_tab[NORM_TRANSFORM][IDX] =
+ TAG(transform_normals);
+
+ gl_normal_tab[NORM_TRANSFORM | NORM_RESCALE][IDX] =
+ TAG(transform_rescale_normals);
+
+ gl_normal_tab[NORM_TRANSFORM | NORM_NORMALIZE][IDX] =
+ TAG(transform_normalize_normals);
+
+ gl_normal_tab[NORM_RESCALE][IDX] =
+ TAG(rescale_normals);
+
+ gl_normal_tab[NORM_NORMALIZE][IDX] =
+ TAG(normalize_normals);
+}
diff --git a/src/mesa/math/m_trans_tmp.h b/src/mesa/math/m_trans_tmp.h
new file mode 100644
index 0000000000..952fde5594
--- /dev/null
+++ b/src/mesa/math/m_trans_tmp.h
@@ -0,0 +1,210 @@
+/* $Id: m_trans_tmp.h,v 1.1 2000/11/16 21:05:41 keithw Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version: 3.1
+ *
+ * Copyright (C) 1999 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * New (3.1) transformation code written by Keith Whitwell.
+ */
+
+
+/* KW: This file also included by tnl/trans_elt.c to build code
+ * specific to the implementation of array-elements in the
+ * tnl module.
+ */
+
+
+#ifdef DEST_4F
+static void DEST_4F( GLfloat (*t)[4],
+ CONST void *ptr,
+ GLuint stride,
+ ARGS)
+{
+ const GLubyte *f = (GLubyte *) ptr + SRC_START * stride;
+ const GLubyte *first = f;
+ GLuint i;
+
+ (void) first;
+ (void) start;
+ for (i = DST_START ; i < n ; i++, NEXT_F) {
+ CHECK {
+ NEXT_F2;
+ if (SZ >= 1) t[i][0] = TRX_4F(f, 0);
+ if (SZ >= 2) t[i][1] = TRX_4F(f, 1);
+ if (SZ >= 3) t[i][2] = TRX_4F(f, 2);
+ if (SZ == 4) t[i][3] = TRX_4F(f, 3);
+ }
+ }
+}
+#endif
+
+
+#ifdef DEST_3F
+static void DEST_3F( GLfloat (*t)[3],
+ CONST void *ptr,
+ GLuint stride,
+ ARGS)
+{
+ const GLubyte *f = (GLubyte *) ptr + SRC_START * stride;
+ const GLubyte *first = f;
+ GLuint i;
+ (void) first;
+ (void) start;
+ for (i = DST_START ; i < n ; i++, NEXT_F) {
+ CHECK {
+ NEXT_F2;
+ t[i][0] = TRX_3F(f, 0);
+ t[i][1] = TRX_3F(f, 1);
+ t[i][2] = TRX_3F(f, 2);
+ }
+ }
+}
+#endif
+
+#ifdef DEST_1F
+static void DEST_1F( GLfloat *t,
+ CONST void *ptr,
+ GLuint stride,
+ ARGS)
+{
+ const GLubyte *f = (GLubyte *) ptr + SRC_START * stride;
+ const GLubyte *first = f;
+ GLuint i;
+ (void) first;
+ (void) start;
+ for (i = DST_START ; i < n ; i++, NEXT_F) {
+ CHECK {
+ NEXT_F2;
+ t[i] = TRX_1F(f, 0);
+ }
+ }
+}
+#endif
+
+#ifdef DEST_4UB
+static void DEST_4UB( GLubyte (*t)[4],
+ CONST void *ptr,
+ GLuint stride,
+ ARGS)
+{
+ const GLubyte *f = (GLubyte *) ptr + SRC_START * stride;
+ const GLubyte *first = f;
+ GLuint i;
+ (void) start;
+ (void) first;
+ for (i = DST_START ; i < n ; i++, NEXT_F) {
+ CHECK {
+ NEXT_F2;
+ if (SZ >= 1) TRX_UB(t[i][0], f, 0);
+ if (SZ >= 2) TRX_UB(t[i][1], f, 1);
+ if (SZ >= 3) TRX_UB(t[i][2], f, 2);
+ if (SZ == 4) TRX_UB(t[i][3], f, 3); else t[i][3] = 255;
+ }
+ }
+}
+#endif
+
+
+#ifdef DEST_1UB
+static void DEST_1UB( GLubyte *t,
+ CONST void *ptr,
+ GLuint stride,
+ ARGS)
+{
+ const GLubyte *f = (GLubyte *) ptr + SRC_START * stride;
+ const GLubyte *first = f;
+ GLuint i;
+ (void) start;
+ (void) first;
+ for (i = DST_START ; i < n ; i++, NEXT_F) {
+ CHECK {
+ NEXT_F2;
+ TRX_UB(t[i], f, 0);
+ }
+ }
+}
+#endif
+
+
+#ifdef DEST_1UI
+static void DEST_1UI( GLuint *t,
+ CONST void *ptr,
+ GLuint stride,
+ ARGS)
+{
+ const GLubyte *f = (GLubyte *) ptr + SRC_START * stride;
+ const GLubyte *first = f;
+ GLuint i;
+ (void) start;
+ (void) first;
+
+ for (i = DST_START ; i < n ; i++, NEXT_F) {
+ CHECK {
+ NEXT_F2;
+ t[i] = TRX_UI(f, 0);
+ }
+ }
+}
+#endif
+
+
+static void INIT(void)
+{
+#ifdef DEST_1UI
+ ASSERT(SZ == 1);
+ TAB(_1ui)[SRC_IDX] = DEST_1UI;
+#endif
+#ifdef DEST_1UB
+ ASSERT(SZ == 1);
+ TAB(_1ub)[SRC_IDX] = DEST_1UB;
+#endif
+#ifdef DEST_1F
+ ASSERT(SZ == 1);
+ TAB(_1f)[SRC_IDX] = DEST_1F;
+#endif
+#ifdef DEST_3F
+ ASSERT(SZ == 3);
+ TAB(_3f)[SRC_IDX] = DEST_3F;
+#endif
+#ifdef DEST_4UB
+ TAB(_4ub)[SZ][SRC_IDX] = DEST_4UB;
+#endif
+#ifdef DEST_4F
+ TAB(_4f)[SZ][SRC_IDX] = DEST_4F;
+#endif
+
+}
+
+
+#undef INIT
+#undef DEST_1UI
+#undef DEST_1UB
+#undef DEST_4UB
+#undef DEST_3F
+#undef DEST_4F
+#undef DEST_1F
+#undef SZ
+#undef TAG
+
+
diff --git a/src/mesa/math/m_translate.c b/src/mesa/math/m_translate.c
new file mode 100644
index 0000000000..945e35706c
--- /dev/null
+++ b/src/mesa/math/m_translate.c
@@ -0,0 +1,478 @@
+/* $Id: m_translate.c,v 1.1 2000/11/16 21:05:41 keithw Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version: 3.3
+ *
+ * Copyright (C) 1999 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * New (3.1) transformation code written by Keith Whitwell.
+ */
+
+
+#include "glheader.h"
+#include "colormac.h"
+#include "mem.h"
+#include "mmath.h"
+
+#include "m_translate.h"
+
+/* This macro is used on other systems, so undefine it for this module */
+
+#undef CHECK
+
+trans_1f_func gl_trans_1f_tab[MAX_TYPES];
+trans_1ui_func gl_trans_1ui_tab[MAX_TYPES];
+trans_1ub_func gl_trans_1ub_tab[MAX_TYPES];
+trans_3f_func gl_trans_3f_tab[MAX_TYPES];
+trans_4ub_func gl_trans_4ub_tab[5][MAX_TYPES];
+trans_4f_func gl_trans_4f_tab[5][MAX_TYPES];
+
+
+#define PTR_ELT(ptr, elt) (((SRC *)ptr)[elt])
+
+
+#define TAB(x) gl_trans##x##_tab
+#define ARGS GLuint start, GLuint n
+#define SRC_START start
+#define DST_START 0
+#define STRIDE stride
+#define NEXT_F f += stride
+#define NEXT_F2
+#define CHECK
+
+
+
+
+/* GL_BYTE
+ */
+#define SRC GLbyte
+#define SRC_IDX TYPE_IDX(GL_BYTE)
+#define TRX_3F(f,n) BYTE_TO_FLOAT( PTR_ELT(f,n) )
+#define TRX_4F(f,n) (GLfloat)( PTR_ELT(f,n) )
+#define TRX_UB(ub, f,n) ub = BYTE_TO_UBYTE( PTR_ELT(f,n) )
+#define TRX_UI(f,n) (PTR_ELT(f,n) < 0 ? 0 : (GLuint) PTR_ELT(f,n))
+
+
+#define SZ 4
+#define INIT init_trans_4_GLbyte_raw
+#define DEST_4F trans_4_GLbyte_4f_raw
+#define DEST_4UB trans_4_GLbyte_4ub_raw
+#include "m_trans_tmp.h"
+
+#define SZ 3
+#define INIT init_trans_3_GLbyte_raw
+#define DEST_4F trans_3_GLbyte_4f_raw
+#define DEST_4UB trans_3_GLbyte_4ub_raw
+#define DEST_3F trans_3_GLbyte_3f_raw
+#include "m_trans_tmp.h"
+
+#define SZ 2
+#define INIT init_trans_2_GLbyte_raw
+#define DEST_4F trans_2_GLbyte_4f_raw
+#include "m_trans_tmp.h"
+
+#define SZ 1
+#define INIT init_trans_1_GLbyte_raw
+#define DEST_4F trans_1_GLbyte_4f_raw
+#define DEST_1UB trans_1_GLbyte_1ub_raw
+#define DEST_1UI trans_1_GLbyte_1ui_raw
+#include "m_trans_tmp.h"
+
+#undef SRC
+#undef TRX_3F
+#undef TRX_4F
+#undef TRX_UB
+#undef TRX_UI
+#undef SRC_IDX
+
+/* GL_UNSIGNED_BYTE
+ */
+#define SRC GLubyte
+#define SRC_IDX TYPE_IDX(GL_UNSIGNED_BYTE)
+#define TRX_3F(f,n) /* unused */
+#define TRX_4F(f,n) /* unused */
+#define TRX_UB(ub, f,n) ub = PTR_ELT(f,n)
+#define TRX_UI(f,n) (GLuint)PTR_ELT(f,n)
+
+/* 4ub->4ub handled in special case below.
+ */
+
+#define SZ 3
+#define INIT init_trans_3_GLubyte_raw
+#define DEST_4UB trans_3_GLubyte_4ub_raw
+#include "m_trans_tmp.h"
+
+
+#define SZ 1
+#define INIT init_trans_1_GLubyte_raw
+#define DEST_1UI trans_1_GLubyte_1ui_raw
+#define DEST_1UB trans_1_GLubyte_1ub_raw
+#include "m_trans_tmp.h"
+
+#undef SRC
+#undef SRC_IDX
+#undef TRX_3F
+#undef TRX_4F
+#undef TRX_UB
+#undef TRX_UI
+
+
+/* GL_SHORT
+ */
+#define SRC GLshort
+#define SRC_IDX TYPE_IDX(GL_SHORT)
+#define TRX_3F(f,n) SHORT_TO_FLOAT( PTR_ELT(f,n) )
+#define TRX_4F(f,n) (GLfloat)( PTR_ELT(f,n) )
+#define TRX_UB(ub, f,n) ub = SHORT_TO_UBYTE(PTR_ELT(f,n))
+#define TRX_UI(f,n) (PTR_ELT(f,n) < 0 ? 0 : (GLuint) PTR_ELT(f,n))
+
+
+#define SZ 4
+#define INIT init_trans_4_GLshort_raw
+#define DEST_4F trans_4_GLshort_4f_raw
+#define DEST_4UB trans_4_GLshort_4ub_raw
+#include "m_trans_tmp.h"
+
+#define SZ 3
+#define INIT init_trans_3_GLshort_raw
+#define DEST_4F trans_3_GLshort_4f_raw
+#define DEST_4UB trans_3_GLshort_4ub_raw
+#define DEST_3F trans_3_GLshort_3f_raw
+#include "m_trans_tmp.h"
+
+#define SZ 2
+#define INIT init_trans_2_GLshort_raw
+#define DEST_4F trans_2_GLshort_4f_raw
+#include "m_trans_tmp.h"
+
+#define SZ 1
+#define INIT init_trans_1_GLshort_raw
+#define DEST_4F trans_1_GLshort_4f_raw
+#define DEST_1UB trans_1_GLshort_1ub_raw
+#define DEST_1UI trans_1_GLshort_1ui_raw
+#include "m_trans_tmp.h"
+
+
+#undef SRC
+#undef SRC_IDX
+#undef TRX_3F
+#undef TRX_4F
+#undef TRX_UB
+#undef TRX_UI
+
+
+/* GL_UNSIGNED_SHORT
+ */
+#define SRC GLushort
+#define SRC_IDX TYPE_IDX(GL_UNSIGNED_SHORT)
+#define TRX_3F(f,n) USHORT_TO_FLOAT( PTR_ELT(f,n) )
+#define TRX_4F(f,n) (GLfloat)( PTR_ELT(f,n) )
+#define TRX_UB(ub,f,n) ub = (GLubyte) (PTR_ELT(f,n) >> 8)
+#define TRX_UI(f,n) (GLuint) PTR_ELT(f,n)
+
+
+#define SZ 4
+#define INIT init_trans_4_GLushort_raw
+#define DEST_4F trans_4_GLushort_4f_raw
+#define DEST_4UB trans_4_GLushort_4ub_raw
+#include "m_trans_tmp.h"
+
+#define SZ 3
+#define INIT init_trans_3_GLushort_raw
+#define DEST_4F trans_3_GLushort_4f_raw
+#define DEST_4UB trans_3_GLushort_4ub_raw
+#define DEST_3F trans_3_GLushort_3f_raw
+#include "m_trans_tmp.h"
+
+#define SZ 2
+#define INIT init_trans_2_GLushort_raw
+#define DEST_4F trans_2_GLushort_4f_raw
+#include "m_trans_tmp.h"
+
+#define SZ 1
+#define INIT init_trans_1_GLushort_raw
+#define DEST_4F trans_1_GLushort_4f_raw
+#define DEST_1UB trans_1_GLushort_1ub_raw
+#define DEST_1UI trans_1_GLushort_1ui_raw
+#include "m_trans_tmp.h"
+
+#undef SRC
+#undef SRC_IDX
+#undef TRX_3F
+#undef TRX_4F
+#undef TRX_UB
+#undef TRX_UI
+
+
+/* GL_INT
+ */
+#define SRC GLint
+#define SRC_IDX TYPE_IDX(GL_INT)
+#define TRX_3F(f,n) INT_TO_FLOAT( PTR_ELT(f,n) )
+#define TRX_4F(f,n) (GLfloat)( PTR_ELT(f,n) )
+#define TRX_UB(ub, f,n) ub = INT_TO_UBYTE(PTR_ELT(f,n))
+#define TRX_UI(f,n) (PTR_ELT(f,n) < 0 ? 0 : (GLuint) PTR_ELT(f,n))
+
+
+#define SZ 4
+#define INIT init_trans_4_GLint_raw
+#define DEST_4F trans_4_GLint_4f_raw
+#define DEST_4UB trans_4_GLint_4ub_raw
+#include "m_trans_tmp.h"
+
+#define SZ 3
+#define INIT init_trans_3_GLint_raw
+#define DEST_4F trans_3_GLint_4f_raw
+#define DEST_4UB trans_3_GLint_4ub_raw
+#define DEST_3F trans_3_GLint_3f_raw
+#include "m_trans_tmp.h"
+
+#define SZ 2
+#define INIT init_trans_2_GLint_raw
+#define DEST_4F trans_2_GLint_4f_raw
+#include "m_trans_tmp.h"
+
+#define SZ 1
+#define INIT init_trans_1_GLint_raw
+#define DEST_4F trans_1_GLint_4f_raw
+#define DEST_1UB trans_1_GLint_1ub_raw
+#define DEST_1UI trans_1_GLint_1ui_raw
+#include "m_trans_tmp.h"
+
+
+#undef SRC
+#undef SRC_IDX
+#undef TRX_3F
+#undef TRX_4F
+#undef TRX_UB
+#undef TRX_UI
+
+
+/* GL_UNSIGNED_INT
+ */
+#define SRC GLuint
+#define SRC_IDX TYPE_IDX(GL_UNSIGNED_INT)
+#define TRX_3F(f,n) INT_TO_FLOAT( PTR_ELT(f,n) )
+#define TRX_4F(f,n) (GLfloat)( PTR_ELT(f,n) )
+#define TRX_UB(ub, f,n) ub = (GLubyte) (PTR_ELT(f,n) >> 24)
+#define TRX_UI(f,n) PTR_ELT(f,n)
+
+
+#define SZ 4
+#define INIT init_trans_4_GLuint_raw
+#define DEST_4F trans_4_GLuint_4f_raw
+#define DEST_4UB trans_4_GLuint_4ub_raw
+#include "m_trans_tmp.h"
+
+#define SZ 3
+#define INIT init_trans_3_GLuint_raw
+#define DEST_4F trans_3_GLuint_4f_raw
+#define DEST_4UB trans_3_GLuint_4ub_raw
+#define DEST_3F trans_3_GLuint_3f_raw
+#include "m_trans_tmp.h"
+
+#define SZ 2
+#define INIT init_trans_2_GLuint_raw
+#define DEST_4F trans_2_GLuint_4f_raw
+#include "m_trans_tmp.h"
+
+#define SZ 1
+#define INIT init_trans_1_GLuint_raw
+#define DEST_4F trans_1_GLuint_4f_raw
+#define DEST_1UB trans_1_GLuint_1ub_raw
+#define DEST_1UI trans_1_GLuint_1ui_raw
+#include "m_trans_tmp.h"
+
+#undef SRC
+#undef SRC_IDX
+#undef TRX_3F
+#undef TRX_4F
+#undef TRX_UB
+#undef TRX_UI
+
+
+/* GL_DOUBLE
+ */
+#define SRC GLdouble
+#define SRC_IDX TYPE_IDX(GL_DOUBLE)
+#define TRX_3F(f,n) PTR_ELT(f,n)
+#define TRX_4F(f,n) PTR_ELT(f,n)
+#define TRX_UB(ub,f,n) FLOAT_COLOR_TO_CHAN(ub, PTR_ELT(f,n))
+#define TRX_UI(f,n) (GLuint) (GLint) PTR_ELT(f,n)
+#define TRX_1F(f,n) PTR_ELT(f,n)
+
+
+#define SZ 4
+#define INIT init_trans_4_GLdouble_raw
+#define DEST_4F trans_4_GLdouble_4f_raw
+#define DEST_4UB trans_4_GLdouble_4ub_raw
+#include "m_trans_tmp.h"
+
+#define SZ 3
+#define INIT init_trans_3_GLdouble_raw
+#define DEST_4F trans_3_GLdouble_4f_raw
+#define DEST_4UB trans_3_GLdouble_4ub_raw
+#define DEST_3F trans_3_GLdouble_3f_raw
+#include "m_trans_tmp.h"
+
+#define SZ 2
+#define INIT init_trans_2_GLdouble_raw
+#define DEST_4F trans_2_GLdouble_4f_raw
+#include "m_trans_tmp.h"
+
+#define SZ 1
+#define INIT init_trans_1_GLdouble_raw
+#define DEST_4F trans_1_GLdouble_4f_raw
+#define DEST_1UB trans_1_GLdouble_1ub_raw
+#define DEST_1UI trans_1_GLdouble_1ui_raw
+#define DEST_1F trans_1_GLdouble_1f_raw
+#include "m_trans_tmp.h"
+
+#undef SRC
+#undef SRC_IDX
+
+/* GL_FLOAT
+ */
+#define SRC GLfloat
+#define SRC_IDX TYPE_IDX(GL_FLOAT)
+#define SZ 4
+#define INIT init_trans_4_GLfloat_raw
+#define DEST_4UB trans_4_GLfloat_4ub_raw
+#define DEST_4F trans_4_GLfloat_4f_raw
+#include "m_trans_tmp.h"
+
+#define SZ 3
+#define INIT init_trans_3_GLfloat_raw
+#define DEST_4F trans_3_GLfloat_4f_raw
+#define DEST_4UB trans_3_GLfloat_4ub_raw
+#define DEST_3F trans_3_GLfloat_3f_raw
+#include "m_trans_tmp.h"
+
+#define SZ 2
+#define INIT init_trans_2_GLfloat_raw
+#define DEST_4F trans_2_GLfloat_4f_raw
+#include "m_trans_tmp.h"
+
+#define SZ 1
+#define INIT init_trans_1_GLfloat_raw
+#define DEST_4F trans_1_GLfloat_4f_raw
+#define DEST_1UB trans_1_GLfloat_1ub_raw
+#define DEST_1UI trans_1_GLfloat_1ui_raw
+#define DEST_1F trans_1_GLfloat_1f_raw
+
+#include "m_trans_tmp.h"
+
+#undef SRC
+#undef SRC_IDX
+#undef TRX_3F
+#undef TRX_4F
+#undef TRX_UB
+#undef TRX_UI
+
+
+static void trans_4_GLubyte_4ub_raw (GLubyte (*t)[4],
+ CONST void *Ptr,
+ GLuint stride,
+ ARGS )
+{
+ const GLubyte *f = (GLubyte *) Ptr + SRC_START * stride;
+ GLuint i;
+
+ if (((((long) f | (long) stride)) & 3L) == 0L) {
+ /* Aligned.
+ */
+ for (i = DST_START ; i < n ; i++, f += stride) {
+ COPY_4UBV( t[i], f );
+ }
+ } else {
+ for (i = DST_START ; i < n ; i++, f += stride) {
+ t[i][0] = f[0];
+ t[i][1] = f[1];
+ t[i][2] = f[2];
+ t[i][3] = f[3];
+ }
+ }
+}
+
+
+static void init_translate_raw(void)
+{
+ MEMSET( TAB(_1ui), 0, sizeof(TAB(_1ui)) );
+ MEMSET( TAB(_1ub), 0, sizeof(TAB(_1ub)) );
+ MEMSET( TAB(_3f), 0, sizeof(TAB(_3f)) );
+ MEMSET( TAB(_4ub), 0, sizeof(TAB(_4ub)) );
+ MEMSET( TAB(_4f), 0, sizeof(TAB(_4f)) );
+
+ TAB(_4ub)[4][TYPE_IDX(GL_UNSIGNED_BYTE)] = trans_4_GLubyte_4ub_raw;
+
+ init_trans_4_GLbyte_raw();
+ init_trans_3_GLbyte_raw();
+ init_trans_2_GLbyte_raw();
+ init_trans_1_GLbyte_raw();
+ init_trans_1_GLubyte_raw();
+ init_trans_3_GLubyte_raw();
+ init_trans_4_GLshort_raw();
+ init_trans_3_GLshort_raw();
+ init_trans_2_GLshort_raw();
+ init_trans_1_GLshort_raw();
+ init_trans_4_GLushort_raw();
+ init_trans_3_GLushort_raw();
+ init_trans_2_GLushort_raw();
+ init_trans_1_GLushort_raw();
+ init_trans_4_GLint_raw();
+ init_trans_3_GLint_raw();
+ init_trans_2_GLint_raw();
+ init_trans_1_GLint_raw();
+ init_trans_4_GLuint_raw();
+ init_trans_3_GLuint_raw();
+ init_trans_2_GLuint_raw();
+ init_trans_1_GLuint_raw();
+ init_trans_4_GLdouble_raw();
+ init_trans_3_GLdouble_raw();
+ init_trans_2_GLdouble_raw();
+ init_trans_1_GLdouble_raw();
+ init_trans_4_GLfloat_raw();
+ init_trans_3_GLfloat_raw();
+ init_trans_2_GLfloat_raw();
+ init_trans_1_GLfloat_raw();
+}
+
+
+#undef TAB
+#undef CLASS
+#undef ARGS
+#undef CHECK
+#undef SRC_START
+#undef DST_START
+#undef NEXT_F
+#undef NEXT_F2
+
+
+
+
+
+void
+_math_init_translate( void )
+{
+ init_translate_raw();
+}
diff --git a/src/mesa/math/m_translate.h b/src/mesa/math/m_translate.h
new file mode 100644
index 0000000000..5c60b972ff
--- /dev/null
+++ b/src/mesa/math/m_translate.h
@@ -0,0 +1,92 @@
+/* $Id: m_translate.h,v 1.1 2000/11/16 21:05:41 keithw Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version: 3.1
+ *
+ * Copyright (C) 1999 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef _M_TRANSLATE_H_
+#define _M_TRANSLATE_H_
+
+
+typedef void (*trans_1f_func)(GLfloat *to,
+ CONST void *ptr,
+ GLuint stride,
+ GLuint start,
+ GLuint n );
+
+typedef void (*trans_1ui_func)(GLuint *to,
+ CONST void *ptr,
+ GLuint stride,
+ GLuint start,
+ GLuint n );
+
+typedef void (*trans_1ub_func)(GLubyte *to,
+ CONST void *ptr,
+ GLuint stride,
+ GLuint start,
+ GLuint n );
+
+typedef void (*trans_4ub_func)(GLubyte (*to)[4],
+ CONST void *ptr,
+ GLuint stride,
+ GLuint start,
+ GLuint n );
+
+typedef void (*trans_4f_func)(GLfloat (*to)[4],
+ CONST void *ptr,
+ GLuint stride,
+ GLuint start,
+ GLuint n );
+
+typedef void (*trans_3f_func)(GLfloat (*to)[3],
+ CONST void *ptr,
+ GLuint stride,
+ GLuint start,
+ GLuint n );
+
+
+
+
+/* Translate GL_UNSIGNED_BYTE, etc to the indexes used in the arrays
+ * below.
+ */
+#define TYPE_IDX(t) ((t) & 0xf)
+
+#define MAX_TYPES TYPE_IDX(GL_DOUBLE)+1 /* 0xa + 1 */
+
+/* Only useful combinations are defined, thus there is no function to
+ * translate eg, ubyte->float or ubyte->ubyte, which are never used.
+ */
+extern trans_1f_func gl_trans_1f_tab[MAX_TYPES];
+extern trans_1ui_func gl_trans_1ui_tab[MAX_TYPES];
+extern trans_1ub_func gl_trans_1ub_tab[MAX_TYPES];
+extern trans_3f_func gl_trans_3f_tab[MAX_TYPES];
+extern trans_4ub_func gl_trans_4ub_tab[5][MAX_TYPES];
+extern trans_4f_func gl_trans_4f_tab[5][MAX_TYPES];
+
+
+extern void gl_init_translate( void );
+
+
+#endif
diff --git a/src/mesa/math/m_vector.c b/src/mesa/math/m_vector.c
new file mode 100644
index 0000000000..4dbf68f8e0
--- /dev/null
+++ b/src/mesa/math/m_vector.c
@@ -0,0 +1,367 @@
+/* $Id: m_vector.c,v 1.1 2000/11/16 21:05:41 keithw Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version: 3.5
+ *
+ * Copyright (C) 1999-2000 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * New (3.1) transformation code written by Keith Whitwell.
+ */
+
+
+#include "glheader.h"
+#include "macros.h"
+#include "mem.h"
+
+#include "m_vector.h"
+
+
+
+/*
+ * Given a vector [count][4] of floats, set all the [][elt] values
+ * to 0 (if elt = 0, 1, 2) or 1.0 (if elt = 3).
+ */
+void gl_vector4f_clean_elem( GLvector4f *vec, GLuint count, GLuint elt )
+{
+ static const GLubyte elem_bits[4] = {
+ VEC_DIRTY_0,
+ VEC_DIRTY_1,
+ VEC_DIRTY_2,
+ VEC_DIRTY_3
+ };
+ static const GLfloat clean[4] = { 0, 0, 0, 1 };
+ const GLfloat v = clean[elt];
+ GLfloat (*data)[4] = (GLfloat (*)[4])vec->start;
+ GLuint i;
+
+ for (i = 0 ; i < count ; i++)
+ data[i][elt] = v;
+
+ vec->flags &= ~elem_bits[elt];
+}
+
+static const GLubyte size_bits[5] = {
+ 0,
+ VEC_SIZE_1,
+ VEC_SIZE_2,
+ VEC_SIZE_3,
+ VEC_SIZE_4,
+};
+
+
+
+/*
+ * Initialize GLvector objects.
+ * Input: v - the vector object to initialize.
+ * flags - bitwise-OR of VEC_* flags
+ * storage - pointer to storage for the vector's data
+ */
+
+
+void gl_vector4f_init( GLvector4f *v, GLuint flags, GLfloat (*storage)[4] )
+{
+ v->stride = 4 * sizeof(GLfloat);
+ v->size = 2; /* may change: 2-4 for vertices and 1-4 for texcoords */
+ v->data = storage;
+ v->start = (GLfloat *) storage;
+ v->count = 0;
+ v->flags = size_bits[4] | flags | VEC_GOOD_STRIDE;
+}
+
+void gl_vector3f_init( GLvector3f *v, GLuint flags, GLfloat (*storage)[3] )
+{
+ v->stride = 3 * sizeof(GLfloat);
+ v->data = storage;
+ v->start = (GLfloat *) storage;
+ v->count = 0;
+ v->flags = flags | VEC_GOOD_STRIDE;
+}
+
+void gl_vector1f_init( GLvector1f *v, GLuint flags, GLfloat *storage )
+{
+ v->stride = 1*sizeof(GLfloat);
+ v->data = storage;
+ v->start = (GLfloat *)storage;
+ v->count = 0;
+ v->flags = flags | VEC_GOOD_STRIDE;
+}
+
+void gl_vector4ub_init( GLvector4ub *v, GLuint flags, GLubyte (*storage)[4] )
+{
+ v->stride = 4 * sizeof(GLubyte);
+ v->data = storage;
+ v->start = (GLubyte *) storage;
+ v->count = 0;
+ v->flags = flags | VEC_GOOD_STRIDE;
+}
+
+void gl_vector1ub_init( GLvector1ub *v, GLuint flags, GLubyte *storage )
+{
+ v->stride = 1 * sizeof(GLubyte);
+ v->data = storage;
+ v->start = (GLubyte *) storage;
+ v->count = 0;
+ v->flags = flags | VEC_GOOD_STRIDE;
+}
+
+void gl_vector1ui_init( GLvector1ui *v, GLuint flags, GLuint *storage )
+{
+ v->stride = 1 * sizeof(GLuint);
+ v->data = storage;
+ v->start = (GLuint *) storage;
+ v->count = 0;
+ v->flags = flags | VEC_GOOD_STRIDE;
+}
+
+
+/*
+ * Initialize GLvector objects and allocate storage.
+ * Input: v - the vector object
+ * sz - unused????
+ * flags - bitwise-OR of VEC_* flags
+ * count - number of elements to allocate in vector
+ * alignment - desired memory alignment for the data (in bytes)
+ */
+
+
+void gl_vector4f_alloc( GLvector4f *v, GLuint flags, GLuint count,
+ GLuint alignment )
+{
+ v->stride = 4 * sizeof(GLfloat);
+ v->size = 2;
+ v->storage = ALIGN_MALLOC( count * 4 * sizeof(GLfloat), alignment );
+ v->start = (GLfloat *) v->storage;
+ v->data = (GLfloat (*)[4]) v->storage;
+ v->count = 0;
+ v->flags = size_bits[4] | flags | VEC_MALLOC | VEC_GOOD_STRIDE;
+}
+
+void gl_vector3f_alloc( GLvector3f *v, GLuint flags, GLuint count,
+ GLuint alignment )
+{
+ v->stride = 3 * sizeof(GLfloat);
+ v->storage = ALIGN_MALLOC( count * 3 * sizeof(GLfloat), alignment );
+ v->start = (GLfloat *) v->storage;
+ v->data = (GLfloat (*)[3]) v->storage;
+ v->count = 0;
+ v->flags = flags | VEC_MALLOC | VEC_GOOD_STRIDE;
+}
+
+void gl_vector1f_alloc( GLvector1f *v, GLuint flags, GLuint count,
+ GLuint alignment )
+{
+ v->stride = sizeof(GLfloat);
+ v->storage = v->start = (GLfloat *)
+ ALIGN_MALLOC( count * sizeof(GLfloat), alignment );
+ v->data = v->start;
+ v->count = 0;
+ v->flags = flags | VEC_MALLOC | VEC_GOOD_STRIDE;
+}
+
+void gl_vector4ub_alloc( GLvector4ub *v, GLuint flags, GLuint count,
+ GLuint alignment )
+{
+ v->stride = 4 * sizeof(GLubyte);
+ v->storage = ALIGN_MALLOC( count * 4 * sizeof(GLubyte), alignment );
+ v->start = (GLubyte *) v->storage;
+ v->data = (GLubyte (*)[4]) v->storage;
+ v->count = 0;
+ v->flags = flags | VEC_MALLOC | VEC_GOOD_STRIDE;
+}
+
+void gl_vector1ub_alloc( GLvector1ub *v, GLuint flags, GLuint count,
+ GLuint alignment )
+{
+ v->stride = 1 * sizeof(GLubyte);
+ v->storage = ALIGN_MALLOC( count * sizeof(GLubyte), alignment );
+ v->start = (GLubyte *) v->storage;
+ v->data = (GLubyte *) v->storage;
+ v->count = 0;
+ v->flags = flags | VEC_MALLOC | VEC_GOOD_STRIDE;
+}
+
+void gl_vector1ui_alloc( GLvector1ui *v, GLuint flags, GLuint count,
+ GLuint alignment )
+{
+ v->stride = 1 * sizeof(GLuint);
+ v->storage = ALIGN_MALLOC( count * sizeof(GLuint), alignment );
+ v->start = (GLuint *) v->storage;
+ v->data = (GLuint *) v->storage;
+ v->count = 0;
+ v->flags = flags | VEC_MALLOC | VEC_GOOD_STRIDE;
+}
+
+
+
+/*
+ * Vector deallocation. Free whatever memory is pointed to by the
+ * vector's storage field if the VEC_MALLOC flag is set.
+ * DO NOT free the GLvector object itself, though.
+ */
+
+
+void gl_vector4f_free( GLvector4f *v )
+{
+ if (v->flags & VEC_MALLOC) {
+ ALIGN_FREE( v->storage );
+ v->data = NULL;
+ v->start = NULL;
+ v->storage = NULL;
+ v->flags &= ~VEC_MALLOC;
+ }
+}
+
+void gl_vector3f_free( GLvector3f *v )
+{
+ if (v->flags & VEC_MALLOC) {
+ ALIGN_FREE( v->storage );
+ v->data = 0;
+ v->start = 0;
+ v->storage = 0;
+ v->flags &= ~VEC_MALLOC;
+ }
+}
+
+void gl_vector1f_free( GLvector1f *v )
+{
+ if (v->flags & VEC_MALLOC) {
+ ALIGN_FREE( v->storage );
+ v->data = NULL;
+ v->start = NULL;
+ v->storage = NULL;
+ v->flags &= ~VEC_MALLOC;
+ }
+}
+
+void gl_vector4ub_free( GLvector4ub *v )
+{
+ if (v->flags & VEC_MALLOC) {
+ ALIGN_FREE( v->storage );
+ v->data = NULL;
+ v->start = NULL;
+ v->storage = NULL;
+ v->flags &= ~VEC_MALLOC;
+ }
+}
+
+void gl_vector1ub_free( GLvector1ub *v )
+{
+ if (v->flags & VEC_MALLOC) {
+ ALIGN_FREE( v->storage );
+ v->data = NULL;
+ v->start = NULL;
+ v->storage = NULL;
+ v->flags &= ~VEC_MALLOC;
+ }
+}
+
+void gl_vector1ui_free( GLvector1ui *v )
+{
+ if (v->flags & VEC_MALLOC) {
+ ALIGN_FREE( v->storage );
+ v->data = NULL;
+ v->start = NULL;
+ v->storage = NULL;
+ v->flags &= ~VEC_MALLOC;
+ }
+}
+
+
+/*
+ * For debugging
+ */
+void gl_vector4f_print( GLvector4f *v, GLubyte *cullmask, GLboolean culling )
+{
+ GLfloat c[4] = { 0, 0, 0, 1 };
+ const char *templates[5] = {
+ "%d:\t0, 0, 0, 1\n",
+ "%d:\t%f, 0, 0, 1\n",
+ "%d:\t%f, %f, 0, 1\n",
+ "%d:\t%f, %f, %f, 1\n",
+ "%d:\t%f, %f, %f, %f\n"
+ };
+
+ const char *t = templates[v->size];
+ GLfloat *d = (GLfloat *)v->data;
+ GLuint j, i = 0, count;
+
+ printf("data-start\n");
+ for ( ; d != v->start ; STRIDE_F(d, v->stride), i++)
+ printf( t, i, d[0], d[1], d[2], d[3]);
+
+ printf("start-count(%u)\n", v->count);
+ count = i + v->count;
+
+ if (culling) {
+ for ( ; i < count ; STRIDE_F(d, v->stride), i++)
+ if (cullmask[i])
+ printf( t, i, d[0], d[1], d[2], d[3]);
+ }
+ else {
+ for ( ; i < count ; STRIDE_F(d, v->stride), i++)
+ printf( t, i, d[0], d[1], d[2], d[3]);
+ }
+
+ for (j = v->size ; j < 4; j++) {
+ if ((v->flags & (1<<j)) == 0) {
+
+ printf("checking col %u is clean as advertised ", j);
+
+ for (i = 0, d = (GLfloat *) v->data ;
+ i < count && d[j] == c[j] ;
+ i++, STRIDE_F(d, v->stride)) {};
+
+ if (i == count)
+ printf(" --> ok\n");
+ else
+ printf(" --> Failed at %u ******\n", i);
+ }
+ }
+}
+
+
+/*
+ * For debugging
+ */
+void gl_vector3f_print( GLvector3f *v, GLubyte *cullmask, GLboolean culling )
+{
+ GLfloat *d = (GLfloat *)v->data;
+ GLuint i = 0, count;
+
+ printf("data-start\n");
+ for ( ; d != v->start ; STRIDE_F(d,v->stride), i++)
+ printf( "%u:\t%f, %f, %f\n", i, d[0], d[1], d[2]);
+
+ printf("start-count(%u)\n", v->count);
+ count = i + v->count;
+
+ if (culling) {
+ for ( ; i < count ; STRIDE_F(d,v->stride), i++)
+ if (cullmask[i])
+ printf( "%u:\t%f, %f, %f\n", i, d[0], d[1], d[2]);
+ }
+ else {
+ for ( ; i < count ; STRIDE_F(d,v->stride), i++)
+ printf( "%u:\t%f, %f, %f\n", i, d[0], d[1], d[2]);
+ }
+}
diff --git a/src/mesa/math/m_vector.h b/src/mesa/math/m_vector.h
new file mode 100644
index 0000000000..c4af1eaade
--- /dev/null
+++ b/src/mesa/math/m_vector.h
@@ -0,0 +1,188 @@
+/* $Id: m_vector.h,v 1.1 2000/11/16 21:05:41 keithw Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version: 3.5
+ *
+ * Copyright (C) 1999-2000 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * New (3.1) transformation code written by Keith Whitwell.
+ */
+
+
+#ifndef _M_VECTOR_H_
+#define _M_VECTOR_H_
+
+#include "glheader.h"
+
+
+#define VEC_DIRTY_0 0x1 /* dirty flags not really used any more */
+#define VEC_DIRTY_1 0x2
+#define VEC_DIRTY_2 0x4
+#define VEC_DIRTY_3 0x8
+#define VEC_MALLOC 0x10 /* storage field points to self-allocated mem*/
+#define VEC_WRITABLE 0x20 /* keep both + and - bits for easy testing */
+#define VEC_NOT_WRITABLE 0x40
+#define VEC_GOOD_STRIDE 0x80
+#define VEC_BAD_STRIDE 0x100
+
+#define VEC_WRITABLE_FLAGS (VEC_WRITABLE|VEC_NOT_WRITABLE)
+#define VEC_STRIDE_FLAGS (VEC_GOOD_STRIDE|VEC_BAD_STRIDE)
+
+
+#define VEC_SIZE_1 VEC_DIRTY_0
+#define VEC_SIZE_2 (VEC_DIRTY_0|VEC_DIRTY_1)
+#define VEC_SIZE_3 (VEC_DIRTY_0|VEC_DIRTY_1|VEC_DIRTY_2)
+#define VEC_SIZE_4 (VEC_DIRTY_0|VEC_DIRTY_1|VEC_DIRTY_2|VEC_DIRTY_3)
+
+
+
+/* Wrap all the information about vectors up in a struct. Has
+ * additional fields compared to the other vectors to help us track of
+ * different vertex sizes, and whether we need to clean columns out
+ * because they contain non-(0,0,0,1) values.
+ *
+ * The start field is used to reserve data for copied vertices at the
+ * end of gl_transform_vb, and avoids the need for a multiplication in
+ * the transformation routines.
+ */
+typedef struct {
+ GLfloat (*data)[4]; /* may be malloc'd or point to client data */
+ GLfloat *start; /* points somewhere inside of <data> */
+ GLuint count; /* size of the vector (in elements) */
+ GLuint stride; /* stride from one element to the next (in bytes) */
+ GLuint size; /* 2-4 for vertices and 1-4 for texcoords */
+ GLuint flags; /* which columns are dirty */
+ void *storage; /* self-allocated storage */
+} GLvector4f;
+
+
+extern void gl_vector4f_init( GLvector4f *v, GLuint flags,
+ GLfloat (*storage)[4] );
+extern void gl_vector4f_alloc( GLvector4f *v, GLuint flags,
+ GLuint count, GLuint alignment );
+extern void gl_vector4f_free( GLvector4f *v );
+extern void gl_vector4f_print( GLvector4f *v, GLubyte *, GLboolean );
+extern void gl_vector4f_clean_elem( GLvector4f *vec, GLuint nr, GLuint elt );
+
+
+/* Could use a single vector type for normals and vertices, but
+ * this way avoids some casts.
+ */
+typedef struct {
+ GLfloat (*data)[3];
+ GLfloat *start;
+ GLuint count;
+ GLuint stride;
+ GLuint flags;
+ void *storage;
+} GLvector3f;
+
+extern void gl_vector3f_init( GLvector3f *v, GLuint flags, GLfloat (*)[3] );
+extern void gl_vector3f_alloc( GLvector3f *v, GLuint flags, GLuint count,
+ GLuint alignment );
+extern void gl_vector3f_free( GLvector3f *v );
+extern void gl_vector3f_print( GLvector3f *v, GLubyte *, GLboolean );
+
+
+typedef struct {
+ GLfloat *data;
+ GLfloat *start;
+ GLuint count;
+ GLuint stride;
+ GLuint flags;
+ void *storage;
+} GLvector1f;
+
+extern void gl_vector1f_free( GLvector1f *v );
+extern void gl_vector1f_init( GLvector1f *v, GLuint flags, GLfloat * );
+extern void gl_vector1f_alloc( GLvector1f *v, GLuint flags, GLuint count,
+ GLuint alignment );
+
+
+/* For 4ub rgba values.
+ */
+typedef struct {
+ GLubyte (*data)[4];
+ GLubyte *start;
+ GLuint count;
+ GLuint stride;
+ GLuint flags;
+ void *storage;
+} GLvector4ub;
+
+extern void gl_vector4ub_init( GLvector4ub *v, GLuint flags,
+ GLubyte (*storage)[4] );
+extern void gl_vector4ub_alloc( GLvector4ub *v, GLuint flags, GLuint count,
+ GLuint alignment );
+extern void gl_vector4ub_free( GLvector4ub * );
+
+
+
+
+/* For 1ub values, eg edgeflag.
+ */
+typedef struct {
+ GLubyte *data;
+ GLubyte *start;
+ GLuint count;
+ GLuint stride;
+ GLuint flags;
+ void *storage;
+} GLvector1ub;
+
+extern void gl_vector1ub_init( GLvector1ub *v, GLuint flags, GLubyte *storage);
+extern void gl_vector1ub_alloc( GLvector1ub *v, GLuint flags, GLuint count,
+ GLuint alignment );
+extern void gl_vector1ub_free( GLvector1ub * );
+
+
+
+
+/* For, eg Index, Array element.
+ */
+typedef struct {
+ GLuint *data;
+ GLuint *start;
+ GLuint count;
+ GLuint stride;
+ GLuint flags;
+ void *storage;
+} GLvector1ui;
+
+extern void gl_vector1ui_init( GLvector1ui *v, GLuint flags, GLuint *storage );
+extern void gl_vector1ui_alloc( GLvector1ui *v, GLuint flags, GLuint count,
+ GLuint alignment );
+extern void gl_vector1ui_free( GLvector1ui * );
+
+
+
+/*
+ * Given vector <v>, return a pointer (cast to <type *> to the <i>-th element.
+ *
+ * End up doing a lot of slow imuls if not careful.
+ */
+#define VEC_ELT( v, type, i ) \
+ ( (type *) ( ((GLbyte *) ((v)->data)) + (i) * (v)->stride) )
+
+
+#endif
diff --git a/src/mesa/math/m_xform.c b/src/mesa/math/m_xform.c
new file mode 100644
index 0000000000..6bc6a9cea3
--- /dev/null
+++ b/src/mesa/math/m_xform.c
@@ -0,0 +1,251 @@
+/* $Id: m_xform.c,v 1.1 2000/11/16 21:05:41 keithw Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version: 3.5
+ *
+ * Copyright (C) 1999-2000 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/*
+ * Matrix/vertex/vector transformation stuff
+ *
+ *
+ * NOTES:
+ * 1. 4x4 transformation matrices are stored in memory in column major order.
+ * 2. Points/vertices are to be thought of as column vectors.
+ * 3. Transformation of a point p by a matrix M is: p' = M * p
+ */
+
+
+#include "glheader.h"
+#include "macros.h"
+#include "mmath.h"
+
+#include "m_matrix.h"
+#include "m_xform.h"
+
+
+#ifdef DEBUG
+#include "m_debug_xform.h"
+#endif
+
+#ifdef USE_X86_ASM
+#include "X86/common_x86_asm.h"
+#endif
+
+clip_func gl_clip_tab[5];
+dotprod_func gl_dotprod_tab[2][5];
+vec_copy_func gl_copy_tab[2][0x10];
+normal_func gl_normal_tab[0xf][0x4];
+transform_func **(gl_transform_tab[2]);
+static transform_func *cull_transform_tab[5];
+static transform_func *raw_transform_tab[5];
+
+
+/* Raw data format used for:
+ * - Object-to-eye transform prior to culling, although this too
+ * could be culled under some circumstances.
+ * - Eye-to-clip transform (via the function above).
+ * - Cliptesting
+ * - And everything else too, if culling happens to be disabled.
+ */
+#define TAG(x) x##_raw
+#define TAG2(x,y) x##y##_raw
+#define IDX 0
+#define STRIDE_LOOP for (i=0;i<count;i++, STRIDE_F(from, stride))
+#define LOOP for (i=0;i<n;i++)
+#define CULL_CHECK
+#define CLIP_CHECK
+#define ARGS
+#include "m_xform_tmp.h"
+#include "m_clip_tmp.h"
+#include "m_norm_tmp.h"
+#include "m_dotprod_tmp.h"
+#include "m_copy_tmp.h"
+#undef TAG
+#undef TAG2
+#undef LOOP
+#undef CULL_CHECK
+#undef CLIP_CHECK
+#undef ARGS
+#undef IDX
+
+/* Culled data used for:
+ * - texture transformations
+ * - viewport map transformation
+ * - normal transformations prior to lighting
+ * - user cliptests
+ */
+#define TAG(x) x##_masked
+#define TAG2(x,y) x##y##_masked
+#define IDX 1
+#define STRIDE_LOOP for (i=0;i<count;i++, STRIDE_F(from, stride))
+#define LOOP for (i=0;i<n;i++)
+#define CULL_CHECK if (mask[i])
+#define CLIP_CHECK if ((mask[i] & flag) == 0)
+#define ARGS , const GLubyte mask[]
+#include "m_xform_tmp.h"
+#include "m_norm_tmp.h"
+#include "m_dotprod_tmp.h"
+#include "m_copy_tmp.h"
+#undef TAG
+#undef TAG2
+#undef LOOP
+#undef CULL_CHECK
+#undef CLIP_CHECK
+#undef ARGS
+#undef IDX
+
+
+
+
+
+
+GLvector4f *gl_project_points( GLvector4f *proj_vec,
+ const GLvector4f *clip_vec )
+{
+ const GLuint stride = clip_vec->stride;
+ const GLfloat *from = (GLfloat *)clip_vec->start;
+ const GLuint count = clip_vec->count;
+ GLfloat (*vProj)[4] = (GLfloat (*)[4])proj_vec->start;
+ GLuint i;
+
+ for (i = 0 ; i < count ; i++, STRIDE_F(from, stride))
+ {
+ GLfloat oow = 1.0F / from[3];
+ vProj[i][3] = oow;
+ vProj[i][0] = from[0] * oow;
+ vProj[i][1] = from[1] * oow;
+ vProj[i][2] = from[2] * oow;
+ }
+
+ proj_vec->flags |= VEC_SIZE_4;
+ proj_vec->size = 3;
+ proj_vec->count = clip_vec->count;
+ return proj_vec;
+}
+
+
+
+
+
+
+/*
+ * Transform a 4-element row vector (1x4 matrix) by a 4x4 matrix. This
+ * function is used for transforming clipping plane equations and spotlight
+ * directions.
+ * Mathematically, u = v * m.
+ * Input: v - input vector
+ * m - transformation matrix
+ * Output: u - transformed vector
+ */
+void gl_transform_vector( GLfloat u[4], const GLfloat v[4], const GLfloat m[16] )
+{
+ GLfloat v0=v[0], v1=v[1], v2=v[2], v3=v[3];
+#define M(row,col) m[row + col*4]
+ u[0] = v0 * M(0,0) + v1 * M(1,0) + v2 * M(2,0) + v3 * M(3,0);
+ u[1] = v0 * M(0,1) + v1 * M(1,1) + v2 * M(2,1) + v3 * M(3,1);
+ u[2] = v0 * M(0,2) + v1 * M(1,2) + v2 * M(2,2) + v3 * M(3,2);
+ u[3] = v0 * M(0,3) + v1 * M(1,3) + v2 * M(2,3) + v3 * M(3,3);
+#undef M
+}
+
+
+/* Useful for one-off point transformations, as in clipping.
+ * Note that because the matrix isn't analyzed we do too many
+ * multiplies, and that the result is always 4-clean.
+ */
+void gl_transform_point_sz( GLfloat Q[4], const GLfloat M[16],
+ const GLfloat P[4], GLuint sz )
+{
+ if (Q == P)
+ return;
+
+ if (sz == 4)
+ {
+ Q[0] = M[0] * P[0] + M[4] * P[1] + M[8] * P[2] + M[12] * P[3];
+ Q[1] = M[1] * P[0] + M[5] * P[1] + M[9] * P[2] + M[13] * P[3];
+ Q[2] = M[2] * P[0] + M[6] * P[1] + M[10] * P[2] + M[14] * P[3];
+ Q[3] = M[3] * P[0] + M[7] * P[1] + M[11] * P[2] + M[15] * P[3];
+ }
+ else if (sz == 3)
+ {
+ Q[0] = M[0] * P[0] + M[4] * P[1] + M[8] * P[2] + M[12];
+ Q[1] = M[1] * P[0] + M[5] * P[1] + M[9] * P[2] + M[13];
+ Q[2] = M[2] * P[0] + M[6] * P[1] + M[10] * P[2] + M[14];
+ Q[3] = M[3] * P[0] + M[7] * P[1] + M[11] * P[2] + M[15];
+ }
+ else if (sz == 2)
+ {
+ Q[0] = M[0] * P[0] + M[4] * P[1] + M[12];
+ Q[1] = M[1] * P[0] + M[5] * P[1] + M[13];
+ Q[2] = M[2] * P[0] + M[6] * P[1] + M[14];
+ Q[3] = M[3] * P[0] + M[7] * P[1] + M[15];
+ }
+ else if (sz == 1)
+ {
+ Q[0] = M[0] * P[0] + M[12];
+ Q[1] = M[1] * P[0] + M[13];
+ Q[2] = M[2] * P[0] + M[14];
+ Q[3] = M[3] * P[0] + M[15];
+ }
+}
+
+
+/*
+ * This is called only once. It initializes several tables with pointers
+ * to optimized transformation functions. This is where we can test for
+ * AMD 3Dnow! capability, Intel Katmai, etc. and hook in the right code.
+ */
+void
+_math_init_transformation( void )
+{
+ gl_transform_tab[0] = raw_transform_tab;
+ gl_transform_tab[1] = cull_transform_tab;
+
+ init_c_transformations_raw();
+ init_c_transformations_masked();
+ init_c_norm_transform_raw();
+ init_c_norm_transform_masked();
+ init_c_cliptest_raw();
+ init_copy0_raw();
+ init_copy0_masked();
+ init_dotprod_raw();
+ init_dotprod_masked();
+
+#ifdef DEBUG
+ gl_test_all_transform_functions( "default" );
+ gl_test_all_normal_transform_functions( "default" );
+#endif
+
+#ifdef USE_X86_ASM
+ gl_init_all_x86_transform_asm();
+#endif
+}
+
+void
+_math_init( void )
+{
+ _math_init_transformation();
+ _math_init_translate();
+ _math_init_vertices();
+}
diff --git a/src/mesa/math/m_xform.h b/src/mesa/math/m_xform.h
new file mode 100644
index 0000000000..1c6ac461e8
--- /dev/null
+++ b/src/mesa/math/m_xform.h
@@ -0,0 +1,224 @@
+/* $Id: m_xform.h,v 1.1 2000/11/16 21:05:41 keithw Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version: 3.3
+ *
+ * Copyright (C) 1999 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+
+
+
+#ifndef _M_XFORM_H
+#define _M_XFORM_H
+
+
+#include "glheader.h"
+#include "config.h"
+#include "math/m_vector.h"
+#include "math/m_matrix.h"
+
+#ifdef USE_X86_ASM
+#define _XFORMAPI _ASMAPI
+#define _XFORMAPIP _ASMAPIP
+#else
+#define _XFORMAPI
+#define _XFORMAPIP *
+#endif
+
+/*
+ * Transform a point (column vector) by a matrix: Q = M * P
+ */
+#define TRANSFORM_POINT( Q, M, P ) \
+ Q[0] = M[0] * P[0] + M[4] * P[1] + M[8] * P[2] + M[12] * P[3]; \
+ Q[1] = M[1] * P[0] + M[5] * P[1] + M[9] * P[2] + M[13] * P[3]; \
+ Q[2] = M[2] * P[0] + M[6] * P[1] + M[10] * P[2] + M[14] * P[3]; \
+ Q[3] = M[3] * P[0] + M[7] * P[1] + M[11] * P[2] + M[15] * P[3];
+
+
+#define TRANSFORM_POINT3( Q, M, P ) \
+ Q[0] = M[0] * P[0] + M[4] * P[1] + M[8] * P[2] + M[12]; \
+ Q[1] = M[1] * P[0] + M[5] * P[1] + M[9] * P[2] + M[13]; \
+ Q[2] = M[2] * P[0] + M[6] * P[1] + M[10] * P[2] + M[14]; \
+ Q[3] = M[3] * P[0] + M[7] * P[1] + M[11] * P[2] + M[15];
+
+
+/*
+ * Transform a normal (row vector) by a matrix: [NX NY NZ] = N * MAT
+ */
+#define TRANSFORM_NORMAL( TO, N, MAT ) \
+do { \
+ TO[0] = N[0] * MAT[0] + N[1] * MAT[1] + N[2] * MAT[2]; \
+ TO[1] = N[0] * MAT[4] + N[1] * MAT[5] + N[2] * MAT[6]; \
+ TO[2] = N[0] * MAT[8] + N[1] * MAT[9] + N[2] * MAT[10]; \
+} while (0)
+
+
+extern void gl_transform_vector( GLfloat u[4],
+ const GLfloat v[4],
+ const GLfloat m[16] );
+
+
+extern void gl_init_transformation( void );
+
+
+/* KW: Clip functions now do projective divide as well. The projected
+ * coordinates are very useful to us because they let us cull
+ * backfaces and eliminate vertices from lighting, fogging, etc
+ * calculations. Despite the fact that this divide could be done one
+ * day in hardware, we would still have a reason to want to do it here
+ * as long as those other calculations remain in software.
+ *
+ * Clipping is a convenient place to do the divide on x86 as it should be
+ * possible to overlap with integer outcode calculations.
+ *
+ * There are two cases where we wouldn't want to do the divide in cliptest:
+ * - When we aren't clipping. We still might want to cull backfaces
+ * so the divide should be done elsewhere. This currently never
+ * happens.
+ *
+ * - When culling isn't likely to help us, such as when the GL culling
+ * is disabled and we not lighting or are only lighting
+ * one-sided. In this situation, backface determination provides
+ * us with no useful information. A tricky case to detect is when
+ * all input data is already culled, although hopefully the
+ * application wouldn't turn on culling in such cases.
+ *
+ * We supply a buffer to hold the [x/w,y/w,z/w,1/w] values which
+ * are the result of the projection. This is only used in the
+ * 4-vector case - in other cases, we just use the clip coordinates
+ * as the projected coordinates - they are identical.
+ *
+ * This is doubly convenient because it means the Win[] array is now
+ * of the same stride as all the others, so I can now turn map_vertices
+ * into a straight-forward matrix transformation, with asm acceleration
+ * automatically available.
+ */
+
+/* Vertex buffer clipping flags
+ */
+#define CLIP_RIGHT_SHIFT 0
+#define CLIP_LEFT_SHIFT 1
+#define CLIP_TOP_SHIFT 2
+#define CLIP_BOTTOM_SHIFT 3
+#define CLIP_NEAR_SHIFT 4
+#define CLIP_FAR_SHIFT 5
+
+#define CLIP_RIGHT_BIT 0x01
+#define CLIP_LEFT_BIT 0x02
+#define CLIP_TOP_BIT 0x04
+#define CLIP_BOTTOM_BIT 0x08
+#define CLIP_NEAR_BIT 0x10
+#define CLIP_FAR_BIT 0x20
+#define CLIP_USER_BIT 0x40
+#define CLIP_CULLED_BIT 0x80 /* Vertex has been culled */
+#define CLIP_ALL_BITS 0x3f
+
+
+typedef GLvector4f * (_XFORMAPIP clip_func)( GLvector4f *vClip,
+ GLvector4f *vProj,
+ GLubyte clipMask[],
+ GLubyte *orMask,
+ GLubyte *andMask );
+
+typedef void (*dotprod_func)( GLvector4f *out_vec,
+ GLuint elt,
+ const GLvector4f *coord_vec,
+ const GLfloat plane[4],
+ const GLubyte mask[]);
+
+typedef void (*vec_copy_func)( GLvector4f *to,
+ const GLvector4f *from,
+ const GLubyte mask[]);
+
+
+
+/*
+ * Functions for transformation of normals in the VB.
+ */
+typedef void (_NORMAPIP normal_func)( const GLmatrix *mat,
+ GLfloat scale,
+ const GLvector3f *in,
+ const GLfloat lengths[],
+ const GLubyte mask[],
+ GLvector3f *dest );
+
+
+/* Flags for selecting a normal transformation function.
+ */
+#define NORM_RESCALE 0x1 /* apply the scale factor */
+#define NORM_NORMALIZE 0x2 /* normalize */
+#define NORM_TRANSFORM 0x4 /* apply the transformation matrix */
+#define NORM_TRANSFORM_NO_ROT 0x8 /* apply the transformation matrix */
+
+
+
+
+/* KW: New versions of the transform function allow a mask array
+ * specifying that individual vector transform should be skipped
+ * when the mask byte is zero. This is always present as a
+ * parameter, to allow a unified interface.
+ */
+typedef void (_XFORMAPIP transform_func)( GLvector4f *to_vec,
+ const GLfloat m[16],
+ const GLvector4f *from_vec,
+ const GLubyte *clipmask,
+ const GLubyte flag );
+
+
+extern GLvector4f *gl_project_points( GLvector4f *to,
+ const GLvector4f *from );
+
+extern void gl_transform_bounds3( GLubyte *orMask, GLubyte *andMask,
+ const GLfloat m[16],
+ CONST GLfloat src[][3] );
+
+extern void gl_transform_bounds2( GLubyte *orMask, GLubyte *andMask,
+ const GLfloat m[16],
+ CONST GLfloat src[][3] );
+
+
+extern dotprod_func gl_dotprod_tab[2][5];
+extern vec_copy_func gl_copy_tab[2][0x10];
+extern clip_func gl_clip_tab[5];
+extern normal_func gl_normal_tab[0xf][0x4];
+
+/* Use of 3 layers of linked 1-dimensional arrays to reduce
+ * cost of lookup.
+ */
+extern transform_func **(gl_transform_tab[2]);
+
+
+extern void gl_transform_point_sz( GLfloat Q[4], const GLfloat M[16],
+ const GLfloat P[4], GLuint sz );
+
+
+#define TransformRaw( to, mat, from ) \
+ ( (*gl_transform_tab[0][(from)->size][(mat)->type])( to, (mat)->m, from, 0, 0 ), \
+ (to) )
+
+#define Transform( to, mat, from, mask, cull ) \
+ ( (*gl_transform_tab[cull!=0][(from)->size][(mat)->type])( to, (mat)->m, from, mask, cull ), \
+ (to) )
+
+
+#endif
diff --git a/src/mesa/math/m_xform_tmp.h b/src/mesa/math/m_xform_tmp.h
new file mode 100644
index 0000000000..289255a9cf
--- /dev/null
+++ b/src/mesa/math/m_xform_tmp.h
@@ -0,0 +1,974 @@
+/* $Id: m_xform_tmp.h,v 1.1 2000/11/16 21:05:41 keithw Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version: 3.1
+ *
+ * Copyright (C) 1999 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * New (3.1) transformation code written by Keith Whitwell.
+ */
+
+
+/*----------------------------------------------------------------------
+ * Begin Keith's new code
+ *
+ *----------------------------------------------------------------------
+ */
+
+/* KW: Fixed stride, now measured in bytes as is the OpenGL array stride.
+ */
+
+/* KW: These are now parameterized to produce two versions, one
+ * which transforms all incoming points, and a second which
+ * takes notice of a cullmask array, and only transforms
+ * unculled vertices.
+ */
+
+/* KW: 1-vectors can sneak into the texture pipeline via the array
+ * interface. These functions are here because I want consistant
+ * treatment of the vertex sizes and a lazy strategy for
+ * cleaning unused parts of the vector, and so as not to exclude
+ * them from the vertex array interface.
+ *
+ * Under our current analysis of matrices, there is no way that
+ * the product of a matrix and a 1-vector can remain a 1-vector,
+ * with the exception of the identity transform.
+ */
+
+/* KW: No longer zero-pad outgoing vectors. Now that external
+ * vectors can get into the pipeline we cannot ever assume
+ * that there is more to a vector than indicated by its
+ * size.
+ */
+
+/* KW: Now uses clipmask and a flag to allow us to skip both/either
+ * cliped and/or culled vertices.
+ */
+
+static void _XFORMAPI
+TAG(transform_points1_general)( GLvector4f *to_vec,
+ const GLfloat m[16],
+ const GLvector4f *from_vec,
+ const GLubyte *mask,
+ const GLubyte flag )
+{
+ const GLuint stride = from_vec->stride;
+ GLfloat *from = from_vec->start;
+ GLfloat (*to)[4] = (GLfloat (*)[4])to_vec->start;
+ GLuint count = from_vec->count;
+ const GLfloat m0 = m[0], m12 = m[12];
+ const GLfloat m1 = m[1], m13 = m[13];
+ const GLfloat m2 = m[2], m14 = m[14];
+ const GLfloat m3 = m[3], m15 = m[15];
+ GLuint i;
+ (void) mask;
+ (void) flag;
+ STRIDE_LOOP {
+ CLIP_CHECK {
+ const GLfloat ox = from[0];
+ to[i][0] = m0 * ox + m12;
+ to[i][1] = m1 * ox + m13;
+ to[i][2] = m2 * ox + m14;
+ to[i][3] = m3 * ox + m15;
+ }
+ }
+
+ to_vec->size = 4;
+ to_vec->flags |= VEC_SIZE_4;
+ to_vec->count = from_vec->count;
+}
+
+static void _XFORMAPI
+TAG(transform_points1_identity)( GLvector4f *to_vec,
+ const GLfloat m[16],
+ const GLvector4f *from_vec,
+ const GLubyte *mask,
+ const GLubyte flag )
+{
+ const GLuint stride = from_vec->stride;
+ GLfloat *from = from_vec->start;
+ GLuint count = from_vec->count;
+ GLfloat (*to)[4] = (GLfloat (*)[4])to_vec->start;
+ GLuint i;
+ (void) mask;
+ (void) flag;
+ if (to_vec == from_vec) return;
+ STRIDE_LOOP {
+ CLIP_CHECK {
+ to[i][0] = from[0];
+ }
+ }
+
+ to_vec->size = 1;
+ to_vec->flags |= VEC_SIZE_1;
+ to_vec->count = from_vec->count;
+}
+
+static void _XFORMAPI
+TAG(transform_points1_2d)( GLvector4f *to_vec,
+ const GLfloat m[16],
+ const GLvector4f *from_vec,
+ const GLubyte *mask,
+ const GLubyte flag )
+{
+ const GLuint stride = from_vec->stride;
+ GLfloat *from = from_vec->start;
+ GLfloat (*to)[4] = (GLfloat (*)[4])to_vec->start;
+ GLuint count = from_vec->count;
+ const GLfloat m0 = m[0], m1 = m[1];
+ const GLfloat m12 = m[12], m13 = m[13];
+ GLuint i;
+ (void) mask;
+ (void) flag;
+ STRIDE_LOOP {
+ CLIP_CHECK {
+ const GLfloat ox = from[0];
+ to[i][0] = m0 * ox + m12;
+ to[i][1] = m1 * ox + m13;
+ }
+ }
+ to_vec->size = 2;
+ to_vec->flags |= VEC_SIZE_2;
+ to_vec->count = from_vec->count;
+}
+
+static void _XFORMAPI
+TAG(transform_points1_2d_no_rot)( GLvector4f *to_vec,
+ const GLfloat m[16],
+ const GLvector4f *from_vec,
+ const GLubyte *mask,
+ const GLubyte flag )
+{
+ const GLuint stride = from_vec->stride;
+ GLfloat *from = from_vec->start;
+ GLfloat (*to)[4] = (GLfloat (*)[4])to_vec->start;
+ GLuint count = from_vec->count;
+ const GLfloat m0 = m[0], m12 = m[12], m13 = m[13];
+ GLuint i;
+ (void) mask;
+ (void) flag;
+ STRIDE_LOOP {
+ CLIP_CHECK {
+ const GLfloat ox = from[0];
+ to[i][0] = m0 * ox + m12;
+ to[i][1] = m13;
+ }
+ }
+
+ to_vec->size = 2;
+ to_vec->flags |= VEC_SIZE_2;
+ to_vec->count = from_vec->count;
+}
+
+static void _XFORMAPI
+TAG(transform_points1_3d)( GLvector4f *to_vec,
+ const GLfloat m[16],
+ const GLvector4f *from_vec,
+ const GLubyte *mask,
+ const GLubyte flag )
+{
+ const GLuint stride = from_vec->stride;
+ GLfloat *from = from_vec->start;
+ GLfloat (*to)[4] = (GLfloat (*)[4])to_vec->start;
+ GLuint count = from_vec->count;
+ const GLfloat m0 = m[0], m1 = m[1], m2 = m[2];
+ const GLfloat m12 = m[12], m13 = m[13], m14 = m[14];
+ GLuint i;
+ (void) mask;
+ (void) flag;
+ STRIDE_LOOP {
+ CLIP_CHECK {
+ const GLfloat ox = from[0];
+ to[i][0] = m0 * ox + m12;
+ to[i][1] = m1 * ox + m13;
+ to[i][2] = m2 * ox + m14;
+ }
+ }
+ to_vec->size = 3;
+ to_vec->flags |= VEC_SIZE_3;
+ to_vec->count = from_vec->count;
+}
+
+
+static void _XFORMAPI
+TAG(transform_points1_3d_no_rot)( GLvector4f *to_vec,
+ const GLfloat m[16],
+ const GLvector4f *from_vec,
+ const GLubyte *mask,
+ const GLubyte flag )
+{
+ const GLuint stride = from_vec->stride;
+ GLfloat *from = from_vec->start;
+ GLfloat (*to)[4] = (GLfloat (*)[4])to_vec->start;
+ GLuint count = from_vec->count;
+ const GLfloat m0 = m[0];
+ const GLfloat m12 = m[12], m13 = m[13], m14 = m[14];
+ GLuint i;
+ (void) mask;
+ (void) flag;
+ STRIDE_LOOP {
+ CLIP_CHECK {
+ const GLfloat ox = from[0];
+ to[i][0] = m0 * ox + m12;
+ to[i][1] = m13;
+ to[i][2] = m14;
+ }
+ }
+ to_vec->size = 3;
+ to_vec->flags |= VEC_SIZE_3;
+ to_vec->count = from_vec->count;
+}
+
+static void _XFORMAPI
+TAG(transform_points1_perspective)( GLvector4f *to_vec,
+ const GLfloat m[16],
+ const GLvector4f *from_vec,
+ const GLubyte *mask,
+ const GLubyte flag )
+{
+ const GLuint stride = from_vec->stride;
+ GLfloat *from = from_vec->start;
+ GLfloat (*to)[4] = (GLfloat (*)[4])to_vec->start;
+ GLuint count = from_vec->count;
+ const GLfloat m0 = m[0], m14 = m[14];
+ GLuint i;
+ (void) mask;
+ (void) flag;
+ STRIDE_LOOP {
+ CLIP_CHECK {
+ const GLfloat ox = from[0];
+ to[i][0] = m0 * ox ;
+ to[i][1] = 0 ;
+ to[i][2] = m14;
+ to[i][3] = 0;
+ }
+ }
+ to_vec->size = 4;
+ to_vec->flags |= VEC_SIZE_4;
+ to_vec->count = from_vec->count;
+}
+
+
+
+
+/* 2-vectors, which are a lot more relevant than 1-vectors, are
+ * present early in the geometry pipeline and throughout the
+ * texture pipeline.
+ */
+static void _XFORMAPI
+TAG(transform_points2_general)( GLvector4f *to_vec,
+ const GLfloat m[16],
+ const GLvector4f *from_vec,
+ const GLubyte *mask,
+ const GLubyte flag )
+{
+ const GLuint stride = from_vec->stride;
+ GLfloat *from = from_vec->start;
+ GLfloat (*to)[4] = (GLfloat (*)[4])to_vec->start;
+ GLuint count = from_vec->count;
+ const GLfloat m0 = m[0], m4 = m[4], m12 = m[12];
+ const GLfloat m1 = m[1], m5 = m[5], m13 = m[13];
+ const GLfloat m2 = m[2], m6 = m[6], m14 = m[14];
+ const GLfloat m3 = m[3], m7 = m[7], m15 = m[15];
+ GLuint i;
+ (void) mask;
+ (void) flag;
+ STRIDE_LOOP {
+ CLIP_CHECK {
+ const GLfloat ox = from[0], oy = from[1];
+ to[i][0] = m0 * ox + m4 * oy + m12;
+ to[i][1] = m1 * ox + m5 * oy + m13;
+ to[i][2] = m2 * ox + m6 * oy + m14;
+ to[i][3] = m3 * ox + m7 * oy + m15;
+ }
+ }
+ to_vec->size = 4;
+ to_vec->flags |= VEC_SIZE_4;
+ to_vec->count = from_vec->count;
+}
+
+static void _XFORMAPI
+TAG(transform_points2_identity)( GLvector4f *to_vec,
+ const GLfloat m[16],
+ const GLvector4f *from_vec,
+ const GLubyte *mask,
+ const GLubyte flag )
+{
+ const GLuint stride = from_vec->stride;
+ GLfloat *from = from_vec->start;
+ GLfloat (*to)[4] = (GLfloat (*)[4])to_vec->start;
+ GLuint count = from_vec->count;
+ GLuint i;
+ (void) mask;
+ (void) flag;
+ if (to_vec == from_vec) return;
+ STRIDE_LOOP {
+ CLIP_CHECK {
+ to[i][0] = from[0];
+ to[i][1] = from[1];
+ }
+ }
+ to_vec->size = 2;
+ to_vec->flags |= VEC_SIZE_2;
+ to_vec->count = from_vec->count;
+}
+
+static void _XFORMAPI
+TAG(transform_points2_2d)( GLvector4f *to_vec,
+ const GLfloat m[16],
+ const GLvector4f *from_vec,
+ const GLubyte *mask,
+ const GLubyte flag )
+{
+ const GLuint stride = from_vec->stride;
+ GLfloat *from = from_vec->start;
+ GLfloat (*to)[4] = (GLfloat (*)[4])to_vec->start;
+ GLuint count = from_vec->count;
+ const GLfloat m0 = m[0], m1 = m[1], m4 = m[4], m5 = m[5];
+ const GLfloat m12 = m[12], m13 = m[13];
+ GLuint i;
+ (void) mask;
+ (void) flag;
+ STRIDE_LOOP {
+ CLIP_CHECK {
+ const GLfloat ox = from[0], oy = from[1];
+ to[i][0] = m0 * ox + m4 * oy + m12;
+ to[i][1] = m1 * ox + m5 * oy + m13;
+ }
+ }
+
+ to_vec->size = 2;
+ to_vec->flags |= VEC_SIZE_2;
+ to_vec->count = from_vec->count;
+}
+
+static void _XFORMAPI
+TAG(transform_points2_2d_no_rot)( GLvector4f *to_vec,
+ const GLfloat m[16],
+ const GLvector4f *from_vec,
+ const GLubyte *mask,
+ const GLubyte flag )
+{
+ const GLuint stride = from_vec->stride;
+ GLfloat *from = from_vec->start;
+ GLfloat (*to)[4] = (GLfloat (*)[4])to_vec->start;
+ GLuint count = from_vec->count;
+ const GLfloat m0 = m[0], m5 = m[5], m12 = m[12], m13 = m[13];
+ GLuint i;
+ (void) mask;
+ (void) flag;
+ STRIDE_LOOP {
+ CLIP_CHECK {
+ const GLfloat ox = from[0], oy = from[1];
+ to[i][0] = m0 * ox + m12;
+ to[i][1] = m5 * oy + m13;
+ }
+ }
+
+ to_vec->size = 2;
+ to_vec->flags |= VEC_SIZE_2;
+ to_vec->count = from_vec->count;
+}
+
+static void _XFORMAPI
+TAG(transform_points2_3d)( GLvector4f *to_vec,
+ const GLfloat m[16],
+ const GLvector4f *from_vec,
+ const GLubyte *mask,
+ const GLubyte flag )
+{
+ const GLuint stride = from_vec->stride;
+ GLfloat *from = from_vec->start;
+ GLfloat (*to)[4] = (GLfloat (*)[4])to_vec->start;
+ GLuint count = from_vec->count;
+ const GLfloat m0 = m[0], m1 = m[1], m2 = m[2], m4 = m[4], m5 = m[5];
+ const GLfloat m6 = m[6], m12 = m[12], m13 = m[13], m14 = m[14];
+ GLuint i;
+ (void) mask;
+ (void) flag;
+ STRIDE_LOOP {
+ CLIP_CHECK {
+ const GLfloat ox = from[0], oy = from[1];
+ to[i][0] = m0 * ox + m4 * oy + m12;
+ to[i][1] = m1 * ox + m5 * oy + m13;
+ to[i][2] = m2 * ox + m6 * oy + m14;
+ }
+ }
+ to_vec->size = 3;
+ to_vec->flags |= VEC_SIZE_3;
+ to_vec->count = from_vec->count;
+}
+
+
+/* I would actually say this was a fairly important function, from
+ * a texture transformation point of view.
+ */
+static void _XFORMAPI
+TAG(transform_points2_3d_no_rot)( GLvector4f *to_vec,
+ const GLfloat m[16],
+ const GLvector4f *from_vec,
+ const GLubyte *mask,
+ const GLubyte flag )
+{
+ const GLuint stride = from_vec->stride;
+ GLfloat *from = from_vec->start;
+ GLfloat (*to)[4] = (GLfloat (*)[4])to_vec->start;
+ GLuint count = from_vec->count;
+ const GLfloat m0 = m[0], m5 = m[5];
+ const GLfloat m12 = m[12], m13 = m[13], m14 = m[14];
+ GLuint i;
+ (void) mask;
+ (void) flag;
+ STRIDE_LOOP {
+ CLIP_CHECK {
+ const GLfloat ox = from[0], oy = from[1];
+ to[i][0] = m0 * ox + m12;
+ to[i][1] = m5 * oy + m13;
+ to[i][2] = m14;
+ }
+ }
+ if (m14 == 0) {
+ to_vec->size = 2;
+ to_vec->flags |= VEC_SIZE_2;
+ } else {
+ to_vec->size = 3;
+ to_vec->flags |= VEC_SIZE_3;
+ }
+ to_vec->count = from_vec->count;
+}
+
+/* This may not be called too often, but I wouldn't say it was dead
+ * code. It's also hard to remove any of these functions if you are
+ * attached to the assertions that have appeared in them.
+ */
+static void _XFORMAPI
+TAG(transform_points2_perspective)( GLvector4f *to_vec,
+ const GLfloat m[16],
+ const GLvector4f *from_vec,
+ const GLubyte *mask,
+ const GLubyte flag )
+{
+ const GLuint stride = from_vec->stride;
+ GLfloat *from = from_vec->start;
+ GLfloat (*to)[4] = (GLfloat (*)[4])to_vec->start;
+ GLuint count = from_vec->count;
+ const GLfloat m0 = m[0], m5 = m[5], m14 = m[14];
+ GLuint i;
+ (void) mask;
+ (void) flag;
+ STRIDE_LOOP {
+ CLIP_CHECK {
+ const GLfloat ox = from[0], oy = from[1];
+ to[i][0] = m0 * ox ;
+ to[i][1] = m5 * oy ;
+ to[i][2] = m14;
+ to[i][3] = 0;
+ }
+ }
+ to_vec->size = 4;
+ to_vec->flags |= VEC_SIZE_4;
+ to_vec->count = from_vec->count;
+}
+
+
+
+static void _XFORMAPI
+TAG(transform_points3_general)( GLvector4f *to_vec,
+ const GLfloat m[16],
+ const GLvector4f *from_vec,
+ const GLubyte *mask,
+ const GLubyte flag )
+{
+ const GLuint stride = from_vec->stride;
+ GLfloat *from = from_vec->start;
+ GLfloat (*to)[4] = (GLfloat (*)[4])to_vec->start;
+ GLuint count = from_vec->count;
+ const GLfloat m0 = m[0], m4 = m[4], m8 = m[8], m12 = m[12];
+ const GLfloat m1 = m[1], m5 = m[5], m9 = m[9], m13 = m[13];
+ const GLfloat m2 = m[2], m6 = m[6], m10 = m[10], m14 = m[14];
+ const GLfloat m3 = m[3], m7 = m[7], m11 = m[11], m15 = m[15];
+ GLuint i;
+ (void) mask;
+ (void) flag;
+ STRIDE_LOOP {
+ CLIP_CHECK {
+ const GLfloat ox = from[0], oy = from[1], oz = from[2];
+ to[i][0] = m0 * ox + m4 * oy + m8 * oz + m12;
+ to[i][1] = m1 * ox + m5 * oy + m9 * oz + m13;
+ to[i][2] = m2 * ox + m6 * oy + m10 * oz + m14;
+ to[i][3] = m3 * ox + m7 * oy + m11 * oz + m15;
+ }
+ }
+ to_vec->size = 4;
+ to_vec->flags |= VEC_SIZE_4;
+ to_vec->count = from_vec->count;
+}
+
+static void _XFORMAPI
+TAG(transform_points3_identity)( GLvector4f *to_vec,
+ const GLfloat m[16],
+ const GLvector4f *from_vec,
+ const GLubyte *mask,
+ const GLubyte flag )
+{
+ const GLuint stride = from_vec->stride;
+ GLfloat *from = from_vec->start;
+ GLfloat (*to)[4] = (GLfloat (*)[4])to_vec->start;
+ GLuint count = from_vec->count;
+ GLuint i;
+ (void) mask;
+ (void) flag;
+ if (to_vec == from_vec) return;
+ STRIDE_LOOP {
+ CLIP_CHECK {
+ to[i][0] = from[0];
+ to[i][1] = from[1];
+ to[i][2] = from[2];
+ }
+ }
+ to_vec->size = 3;
+ to_vec->flags |= VEC_SIZE_3;
+ to_vec->count = from_vec->count;
+}
+
+static void _XFORMAPI
+TAG(transform_points3_2d)( GLvector4f *to_vec,
+ const GLfloat m[16],
+ const GLvector4f *from_vec,
+ const GLubyte *mask,
+ const GLubyte flag )
+{
+ const GLuint stride = from_vec->stride;
+ GLfloat *from = from_vec->start;
+ GLfloat (*to)[4] = (GLfloat (*)[4])to_vec->start;
+ GLuint count = from_vec->count;
+ const GLfloat m0 = m[0], m1 = m[1], m4 = m[4], m5 = m[5];
+ const GLfloat m12 = m[12], m13 = m[13];
+ GLuint i;
+ (void) mask;
+ (void) flag;
+ STRIDE_LOOP {
+ CLIP_CHECK {
+ const GLfloat ox = from[0], oy = from[1], oz = from[2];
+ to[i][0] = m0 * ox + m4 * oy + m12 ;
+ to[i][1] = m1 * ox + m5 * oy + m13 ;
+ to[i][2] = + oz ;
+ }
+ }
+ to_vec->size = 3;
+ to_vec->flags |= VEC_SIZE_3;
+ to_vec->count = from_vec->count;
+}
+
+static void _XFORMAPI
+TAG(transform_points3_2d_no_rot)( GLvector4f *to_vec,
+ const GLfloat m[16],
+ const GLvector4f *from_vec,
+ const GLubyte *mask,
+ const GLubyte flag )
+{
+ const GLuint stride = from_vec->stride;
+ GLfloat *from = from_vec->start;
+ GLfloat (*to)[4] = (GLfloat (*)[4])to_vec->start;
+ GLuint count = from_vec->count;
+ const GLfloat m0 = m[0], m5 = m[5], m12 = m[12], m13 = m[13];
+ GLuint i;
+ (void) mask;
+ (void) flag;
+ STRIDE_LOOP {
+ CLIP_CHECK {
+ const GLfloat ox = from[0], oy = from[1], oz = from[2];
+ to[i][0] = m0 * ox + m12 ;
+ to[i][1] = m5 * oy + m13 ;
+ to[i][2] = + oz ;
+ }
+ }
+ to_vec->size = 3;
+ to_vec->flags |= VEC_SIZE_3;
+ to_vec->count = from_vec->count;
+}
+
+static void _XFORMAPI
+TAG(transform_points3_3d)( GLvector4f *to_vec,
+ const GLfloat m[16],
+ const GLvector4f *from_vec,
+ const GLubyte *mask,
+ const GLubyte flag )
+{
+ const GLuint stride = from_vec->stride;
+ GLfloat *from = from_vec->start;
+ GLfloat (*to)[4] = (GLfloat (*)[4])to_vec->start;
+ GLuint count = from_vec->count;
+ const GLfloat m0 = m[0], m1 = m[1], m2 = m[2], m4 = m[4], m5 = m[5];
+ const GLfloat m6 = m[6], m8 = m[8], m9 = m[9], m10 = m[10];
+ const GLfloat m12 = m[12], m13 = m[13], m14 = m[14];
+ GLuint i;
+ (void) mask;
+ (void) flag;
+ STRIDE_LOOP {
+ CLIP_CHECK {
+ const GLfloat ox = from[0], oy = from[1], oz = from[2];
+ to[i][0] = m0 * ox + m4 * oy + m8 * oz + m12 ;
+ to[i][1] = m1 * ox + m5 * oy + m9 * oz + m13 ;
+ to[i][2] = m2 * ox + m6 * oy + m10 * oz + m14 ;
+ }
+ }
+ to_vec->size = 3;
+ to_vec->flags |= VEC_SIZE_3;
+ to_vec->count = from_vec->count;
+}
+
+/* previously known as ortho...
+ */
+static void _XFORMAPI
+TAG(transform_points3_3d_no_rot)( GLvector4f *to_vec,
+ const GLfloat m[16],
+ const GLvector4f *from_vec,
+ const GLubyte *mask,
+ const GLubyte flag )
+{
+ const GLuint stride = from_vec->stride;
+ GLfloat *from = from_vec->start;
+ GLfloat (*to)[4] = (GLfloat (*)[4])to_vec->start;
+ GLuint count = from_vec->count;
+ const GLfloat m0 = m[0], m5 = m[5];
+ const GLfloat m10 = m[10], m12 = m[12], m13 = m[13], m14 = m[14];
+ GLuint i;
+ (void) mask;
+ (void) flag;
+ STRIDE_LOOP {
+ CLIP_CHECK {
+ const GLfloat ox = from[0], oy = from[1], oz = from[2];
+ to[i][0] = m0 * ox + m12 ;
+ to[i][1] = m5 * oy + m13 ;
+ to[i][2] = m10 * oz + m14 ;
+ }
+ }
+ to_vec->size = 3;
+ to_vec->flags |= VEC_SIZE_3;
+ to_vec->count = from_vec->count;
+}
+
+static void _XFORMAPI
+TAG(transform_points3_perspective)( GLvector4f *to_vec,
+ const GLfloat m[16],
+ const GLvector4f *from_vec,
+ const GLubyte *mask,
+ const GLubyte flag )
+{
+ const GLuint stride = from_vec->stride;
+ GLfloat *from = from_vec->start;
+ GLfloat (*to)[4] = (GLfloat (*)[4])to_vec->start;
+ GLuint count = from_vec->count;
+ const GLfloat m0 = m[0], m5 = m[5], m8 = m[8], m9 = m[9];
+ const GLfloat m10 = m[10], m14 = m[14];
+ GLuint i;
+ (void) mask;
+ (void) flag;
+ STRIDE_LOOP {
+ CLIP_CHECK {
+ const GLfloat ox = from[0], oy = from[1], oz = from[2];
+ to[i][0] = m0 * ox + m8 * oz ;
+ to[i][1] = m5 * oy + m9 * oz ;
+ to[i][2] = m10 * oz + m14 ;
+ to[i][3] = -oz ;
+ }
+ }
+ to_vec->size = 4;
+ to_vec->flags |= VEC_SIZE_4;
+ to_vec->count = from_vec->count;
+}
+
+
+
+static void _XFORMAPI
+TAG(transform_points4_general)( GLvector4f *to_vec,
+ const GLfloat m[16],
+ const GLvector4f *from_vec,
+ const GLubyte *mask,
+ const GLubyte flag )
+{
+ const GLuint stride = from_vec->stride;
+ GLfloat *from = from_vec->start;
+ GLfloat (*to)[4] = (GLfloat (*)[4])to_vec->start;
+ GLuint count = from_vec->count;
+ const GLfloat m0 = m[0], m4 = m[4], m8 = m[8], m12 = m[12];
+ const GLfloat m1 = m[1], m5 = m[5], m9 = m[9], m13 = m[13];
+ const GLfloat m2 = m[2], m6 = m[6], m10 = m[10], m14 = m[14];
+ const GLfloat m3 = m[3], m7 = m[7], m11 = m[11], m15 = m[15];
+ GLuint i;
+ (void) mask;
+ (void) flag;
+ STRIDE_LOOP {
+ CLIP_CHECK {
+ const GLfloat ox = from[0], oy = from[1], oz = from[2], ow = from[3];
+ to[i][0] = m0 * ox + m4 * oy + m8 * oz + m12 * ow;
+ to[i][1] = m1 * ox + m5 * oy + m9 * oz + m13 * ow;
+ to[i][2] = m2 * ox + m6 * oy + m10 * oz + m14 * ow;
+ to[i][3] = m3 * ox + m7 * oy + m11 * oz + m15 * ow;
+ }
+ }
+ to_vec->size = 4;
+ to_vec->flags |= VEC_SIZE_4;
+ to_vec->count = from_vec->count;
+}
+
+static void _XFORMAPI
+TAG(transform_points4_identity)( GLvector4f *to_vec,
+ const GLfloat m[16],
+ const GLvector4f *from_vec,
+ const GLubyte *mask,
+ const GLubyte flag )
+{
+ const GLuint stride = from_vec->stride;
+ GLfloat *from = from_vec->start;
+ GLfloat (*to)[4] = (GLfloat (*)[4])to_vec->start;
+ GLuint count = from_vec->count;
+ GLuint i;
+ (void) mask;
+ (void) flag;
+ if (to_vec == from_vec) return;
+ STRIDE_LOOP {
+ CLIP_CHECK {
+ to[i][0] = from[0];
+ to[i][1] = from[1];
+ to[i][2] = from[2];
+ to[i][3] = from[3];
+ }
+ }
+ to_vec->size = 4;
+ to_vec->flags |= VEC_SIZE_4;
+ to_vec->count = from_vec->count;
+}
+
+static void _XFORMAPI
+TAG(transform_points4_2d)( GLvector4f *to_vec,
+ const GLfloat m[16],
+ const GLvector4f *from_vec,
+ const GLubyte *mask,
+ const GLubyte flag )
+{
+ const GLuint stride = from_vec->stride;
+ GLfloat *from = from_vec->start;
+ GLfloat (*to)[4] = (GLfloat (*)[4])to_vec->start;
+ GLuint count = from_vec->count;
+ const GLfloat m0 = m[0], m1 = m[1], m4 = m[4], m5 = m[5];
+ const GLfloat m12 = m[12], m13 = m[13];
+ GLuint i;
+ (void) mask;
+ (void) flag;
+ STRIDE_LOOP {
+ CLIP_CHECK {
+ const GLfloat ox = from[0], oy = from[1], oz = from[2], ow = from[3];
+ to[i][0] = m0 * ox + m4 * oy + m12 * ow;
+ to[i][1] = m1 * ox + m5 * oy + m13 * ow;
+ to[i][2] = + oz ;
+ to[i][3] = ow;
+ }
+ }
+ to_vec->size = 4;
+ to_vec->flags |= VEC_SIZE_4;
+ to_vec->count = from_vec->count;
+}
+
+static void _XFORMAPI
+TAG(transform_points4_2d_no_rot)( GLvector4f *to_vec,
+ const GLfloat m[16],
+ const GLvector4f *from_vec,
+ const GLubyte *mask,
+ const GLubyte flag )
+{
+ const GLuint stride = from_vec->stride;
+ GLfloat *from = from_vec->start;
+ GLfloat (*to)[4] = (GLfloat (*)[4])to_vec->start;
+ GLuint count = from_vec->count;
+ const GLfloat m0 = m[0], m5 = m[5], m12 = m[12], m13 = m[13];
+ GLuint i;
+ (void) mask;
+ (void) flag;
+ STRIDE_LOOP {
+ CLIP_CHECK {
+ const GLfloat ox = from[0], oy = from[1], oz = from[2], ow = from[3];
+ to[i][0] = m0 * ox + m12 * ow;
+ to[i][1] = m5 * oy + m13 * ow;
+ to[i][2] = + oz ;
+ to[i][3] = ow;
+ }
+ }
+ to_vec->size = 4;
+ to_vec->flags |= VEC_SIZE_4;
+ to_vec->count = from_vec->count;
+}
+
+static void _XFORMAPI
+TAG(transform_points4_3d)( GLvector4f *to_vec,
+ const GLfloat m[16],
+ const GLvector4f *from_vec,
+ const GLubyte *mask,
+ const GLubyte flag )
+{
+ const GLuint stride = from_vec->stride;
+ GLfloat *from = from_vec->start;
+ GLfloat (*to)[4] = (GLfloat (*)[4])to_vec->start;
+ GLuint count = from_vec->count;
+ const GLfloat m0 = m[0], m1 = m[1], m2 = m[2], m4 = m[4], m5 = m[5];
+ const GLfloat m6 = m[6], m8 = m[8], m9 = m[9], m10 = m[10];
+ const GLfloat m12 = m[12], m13 = m[13], m14 = m[14];
+ GLuint i;
+ (void) mask;
+ (void) flag;
+ STRIDE_LOOP {
+ CLIP_CHECK {
+ const GLfloat ox = from[0], oy = from[1], oz = from[2], ow = from[3];
+ to[i][0] = m0 * ox + m4 * oy + m8 * oz + m12 * ow;
+ to[i][1] = m1 * ox + m5 * oy + m9 * oz + m13 * ow;
+ to[i][2] = m2 * ox + m6 * oy + m10 * oz + m14 * ow;
+ to[i][3] = ow;
+ }
+ }
+ to_vec->size = 4;
+ to_vec->flags |= VEC_SIZE_4;
+ to_vec->count = from_vec->count;
+}
+
+static void _XFORMAPI
+TAG(transform_points4_3d_no_rot)( GLvector4f *to_vec,
+ const GLfloat m[16],
+ const GLvector4f *from_vec,
+ const GLubyte *mask,
+ const GLubyte flag )
+{
+ const GLuint stride = from_vec->stride;
+ GLfloat *from = from_vec->start;
+ GLfloat (*to)[4] = (GLfloat (*)[4])to_vec->start;
+ GLuint count = from_vec->count;
+ const GLfloat m0 = m[0], m5 = m[5];
+ const GLfloat m10 = m[10], m12 = m[12], m13 = m[13], m14 = m[14];
+ GLuint i;
+ (void) mask;
+ (void) flag;
+ STRIDE_LOOP {
+ CLIP_CHECK {
+ const GLfloat ox = from[0], oy = from[1], oz = from[2], ow = from[3];
+ to[i][0] = m0 * ox + m12 * ow;
+ to[i][1] = m5 * oy + m13 * ow;
+ to[i][2] = m10 * oz + m14 * ow;
+ to[i][3] = ow;
+ }
+ }
+ to_vec->size = 4;
+ to_vec->flags |= VEC_SIZE_4;
+ to_vec->count = from_vec->count;
+}
+
+static void _XFORMAPI
+TAG(transform_points4_perspective)( GLvector4f *to_vec,
+ const GLfloat m[16],
+ const GLvector4f *from_vec,
+ const GLubyte *mask,
+ const GLubyte flag )
+{
+ const GLuint stride = from_vec->stride;
+ GLfloat *from = from_vec->start;
+ GLfloat (*to)[4] = (GLfloat (*)[4])to_vec->start;
+ GLuint count = from_vec->count;
+ const GLfloat m0 = m[0], m5 = m[5], m8 = m[8], m9 = m[9];
+ const GLfloat m10 = m[10], m14 = m[14];
+ GLuint i;
+ (void) mask;
+ (void) flag;
+ STRIDE_LOOP {
+ CLIP_CHECK {
+ const GLfloat ox = from[0], oy = from[1], oz = from[2], ow = from[3];
+ to[i][0] = m0 * ox + m8 * oz ;
+ to[i][1] = m5 * oy + m9 * oz ;
+ to[i][2] = m10 * oz + m14 * ow ;
+ to[i][3] = -oz ;
+ }
+ }
+
+ to_vec->size = 4;
+ to_vec->flags |= VEC_SIZE_4;
+ to_vec->count = from_vec->count;
+}
+
+static transform_func _XFORMAPI TAG(transform_tab_1)[7];
+static transform_func _XFORMAPI TAG(transform_tab_2)[7];
+static transform_func _XFORMAPI TAG(transform_tab_3)[7];
+static transform_func _XFORMAPI TAG(transform_tab_4)[7];
+
+/* Similar functions could be called several times, with more highly
+ * optimized routines overwriting the arrays. This only occurs during
+ * startup.
+ */
+static void _XFORMAPI TAG(init_c_transformations)( void )
+{
+#define TAG_TAB gl_transform_tab[IDX]
+#define TAG_TAB_1 TAG(transform_tab_1)
+#define TAG_TAB_2 TAG(transform_tab_2)
+#define TAG_TAB_3 TAG(transform_tab_3)
+#define TAG_TAB_4 TAG(transform_tab_4)
+
+ TAG_TAB[1] = TAG_TAB_1;
+ TAG_TAB[2] = TAG_TAB_2;
+ TAG_TAB[3] = TAG_TAB_3;
+ TAG_TAB[4] = TAG_TAB_4;
+
+ /* 1-D points (ie texcoords) */
+ TAG_TAB_1[MATRIX_GENERAL] = TAG(transform_points1_general);
+ TAG_TAB_1[MATRIX_IDENTITY] = TAG(transform_points1_identity);
+ TAG_TAB_1[MATRIX_3D_NO_ROT] = TAG(transform_points1_3d_no_rot);
+ TAG_TAB_1[MATRIX_PERSPECTIVE] = TAG(transform_points1_perspective) ;
+ TAG_TAB_1[MATRIX_2D] = TAG(transform_points1_2d);
+ TAG_TAB_1[MATRIX_2D_NO_ROT] = TAG(transform_points1_2d_no_rot);
+ TAG_TAB_1[MATRIX_3D] = TAG(transform_points1_3d);
+
+ /* 2-D points */
+ TAG_TAB_2[MATRIX_GENERAL] = TAG(transform_points2_general);
+ TAG_TAB_2[MATRIX_IDENTITY] = TAG(transform_points2_identity);
+ TAG_TAB_2[MATRIX_3D_NO_ROT] = TAG(transform_points2_3d_no_rot);
+ TAG_TAB_2[MATRIX_PERSPECTIVE] = TAG(transform_points2_perspective) ;
+ TAG_TAB_2[MATRIX_2D] = TAG(transform_points2_2d);
+ TAG_TAB_2[MATRIX_2D_NO_ROT] = TAG(transform_points2_2d_no_rot);
+ TAG_TAB_2[MATRIX_3D] = TAG(transform_points2_3d);
+
+ /* 3-D points */
+ TAG_TAB_3[MATRIX_GENERAL] = TAG(transform_points3_general);
+ TAG_TAB_3[MATRIX_IDENTITY] = TAG(transform_points3_identity);
+ TAG_TAB_3[MATRIX_3D_NO_ROT] = TAG(transform_points3_3d_no_rot);
+ TAG_TAB_3[MATRIX_PERSPECTIVE] = TAG(transform_points3_perspective);
+ TAG_TAB_3[MATRIX_2D] = TAG(transform_points3_2d);
+ TAG_TAB_3[MATRIX_2D_NO_ROT] = TAG(transform_points3_2d_no_rot);
+ TAG_TAB_3[MATRIX_3D] = TAG(transform_points3_3d);
+
+ /* 4-D points */
+ TAG_TAB_4[MATRIX_GENERAL] = TAG(transform_points4_general);
+ TAG_TAB_4[MATRIX_IDENTITY] = TAG(transform_points4_identity);
+ TAG_TAB_4[MATRIX_3D_NO_ROT] = TAG(transform_points4_3d_no_rot);
+ TAG_TAB_4[MATRIX_PERSPECTIVE] = TAG(transform_points4_perspective);
+ TAG_TAB_4[MATRIX_2D] = TAG(transform_points4_2d);
+ TAG_TAB_4[MATRIX_2D_NO_ROT] = TAG(transform_points4_2d_no_rot);
+ TAG_TAB_4[MATRIX_3D] = TAG(transform_points4_3d);
+
+#undef TAG_TAB
+}