summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/mesa/tnl/t_vtx_api.c675
-rw-r--r--src/mesa/tnl/t_vtx_api.h234
-rw-r--r--src/mesa/tnl/t_vtx_exec.c636
-rw-r--r--src/mesa/tnl/t_vtx_sse.c93
-rw-r--r--src/mesa/tnl/t_vtx_x86.c727
5 files changed, 2365 insertions, 0 deletions
diff --git a/src/mesa/tnl/t_vtx_api.c b/src/mesa/tnl/t_vtx_api.c
new file mode 100644
index 0000000000..8ae0569584
--- /dev/null
+++ b/src/mesa/tnl/t_vtx_api.c
@@ -0,0 +1,675 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include "mtypes.h"
+#include "colormac.h"
+#include "simple_list.h"
+#include "vtxfmt.h"
+
+#include "tnl_vtx_api.h"
+
+/* Fallback versions of all the entrypoints for situations where
+ * codegen isn't available. This is slowed significantly by all the
+ * gumph necessary to get to the tnl pointer.
+ */
+
+
+/* MultiTexcoord ends up with both of these branches, unfortunately
+ * (it may its own version of the macro after size-tracking is working).
+ */
+#define ATTRF( ATTR, N, A, B, C, D ) \
+{ \
+ GET_CURRENT_CONTEXT( ctx ); \
+ TNLcontext *tnl = TNL_CONTEXT(ctx); \
+ \
+ if (((ATTR) & 0xf) == 0) { \
+ int i; \
+ \
+ if (N>0) tnl->dmaptr[0].f = A; \
+ if (N>1) tnl->dmaptr[1].f = B; \
+ if (N>2) tnl->dmaptr[2].f = C; \
+ if (N>3) tnl->dmaptr[3].f = D; \
+ \
+ for (i = N; i < tnl->vertex_size; i++) \
+ *tnl->dmaptr[i].i = tnl->vertex[i].i; \
+ \
+ tnl->dmaptr += tnl->vertex_size; \
+ \
+ if (--tnl->counter == 0) \
+ tnl->notify(); \
+ } \
+ else { \
+ GLfloat *dest = tnl->attrptr[(ATTR) & 0xf]; \
+ if (N>0) dest[0] = A; \
+ if (N>1) dest[1] = B; \
+ if (N>2) dest[2] = C; \
+ if (N>3) dest[3] = D; \
+ } \
+}
+
+#define ATTR4F( ATTR, A, B, C, D ) ATTRF( ATTR, 4, A, B, C, D )
+#define ATTR3F( ATTR, A, B, C, D ) ATTRF( ATTR, 3, A, B, C, 1 )
+#define ATTR2F( ATTR, A, B, C, D ) ATTRF( ATTR, 2, A, B, 0, 1 )
+#define ATTR1F( ATTR, A, B, C, D ) ATTRF( ATTR, 1, A, 0, 0, 1 )
+
+#define ATTR3UB( ATTR, A, B, C ) \
+ ATTR3F( ATTR, \
+ UBYTE_TO_FLOAT(A), \
+ UBYTE_TO_FLOAT(B), \
+ UBYTE_TO_FLOAT(C))
+
+
+#define ATTR4UB( ATTR, A, B, C, D ) \
+ ATTR4F( ATTR, \
+ UBYTE_TO_FLOAT(A), \
+ UBYTE_TO_FLOAT(B), \
+ UBYTE_TO_FLOAT(C), \
+ UBYTE_TO_FLOAT(D))
+
+
+/* Vertex
+ */
+static void tnl_Vertex2f( GLfloat x, GLfloat y )
+{
+ ATTR2F( VERT_ATTRIB_POS, x, y );
+}
+
+static void tnl_Vertex2fv( const GLfloat *v )
+{
+ ATTR2F( VERT_ATTRIB_POS, v[0], v[1] );
+}
+
+static void tnl_Vertex3f( GLfloat x, GLfloat y, GLfloat z )
+{
+ ATTR3F( VERT_ATTRIB_POS, x, y, z );
+}
+
+static void tnl_Vertex3fv( const GLfloat *v )
+{
+ ATTR3F( VERT_ATTRIB_POS, v[0], v[1], v[2] );
+}
+
+static void tnl_Vertex4f( GLfloat x, GLfloat y, GLfloat z, GLfloat w )
+{
+ ATTR4F( VERT_ATTRIB_POS, x, y, z, w );
+}
+
+static void tnl_Vertex4fv( const GLfloat *v )
+{
+ ATTR4F( VERT_ATTRIB_POS, v[0], v[1], v[2], v[3] );
+}
+
+
+/* Color
+ */
+static void tnl_Color3ub( GLubyte r, GLubyte g, GLubyte b )
+{
+ ATTR3UB( VERT_ATTRIB_COLOR0, r, g, b );
+}
+
+static void tnl_Color3ubv( const GLubyte *v )
+{
+ ATTR3UB( VERT_ATTRIB_COLOR0, v[0], v[1], v[2] );
+}
+
+static void tnl_Color4ub( GLubyte r, GLubyte g, GLubyte b, GLubyte a )
+{
+ ATTR4UB( VERT_ATTRIB_COLOR0, r, g, b, a );
+}
+
+static void tnl_Color4ubv( const GLubyte *v )
+{
+ ATTR4UB( VERT_ATTRIB_COLOR0, v[0], v[1], v[2], v[3] );
+}
+
+static void tnl_Color3f( GLfloat r, GLfloat g, GLfloat b )
+{
+ ATTR3F( VERT_ATTRIB_COLOR0, r, g, b );
+}
+
+static void tnl_Color3fv( const GLfloat *v )
+{
+ ATTR3F( VERT_ATTRIB_COLOR0, v[0], v[1], v[2] );
+}
+
+static void tnl_Color4f( GLfloat r, GLfloat g, GLfloat b, GLfloat a )
+{
+ ATTR4F( VERT_ATTRIB_COLOR0, r, g, b, a );
+}
+
+static void tnl_Color4fv( const GLfloat *v )
+{
+ ATTR4F( VERT_ATTRIB_COLOR0, v[0], v[1], v[2], v[3] );
+}
+
+
+/* Secondary Color
+ */
+static void tnl_SecondaryColor3ubEXT( GLubyte r, GLubyte g, GLubyte b )
+{
+ ATTR3UB( VERT_ATTRIB_COLOR1, r, g, b );
+}
+
+static void tnl_SecondaryColor3ubvEXT( const GLubyte *v )
+{
+ ATTR3UB( VERT_ATTRIB_COLOR1, v[0], v[1], v[2] );
+}
+
+static void tnl_SecondaryColor3fEXT( GLfloat r, GLfloat g, GLfloat b )
+{
+ ATTR3F( VERT_ATTRIB_COLOR1, r, g, b );
+}
+
+static void tnl_SecondaryColor3fvEXT( const GLfloat *v )
+{
+ ATTR3F( VERT_ATTRIB_COLOR1, v[0], v[1], v[2] );
+}
+
+
+
+/* Fog Coord
+ */
+static void tnl_FogCoordfEXT( GLfloat f )
+{
+ ATTR1F( VERT_ATTRIB_FOG, f );
+}
+
+static void tnl_FogCoordfvEXT( const GLfloat *v )
+{
+ ATTR1F( VERT_ATTRIB_FOG, v[0] );
+}
+
+
+
+/* Normal
+ */
+static void tnl_Normal3f( GLfloat n0, GLfloat n1, GLfloat n2 )
+{
+ ATTR3F( VERT_ATTRIB_NORMAL, n0, n1, n2 );
+}
+
+static void tnl_Normal3fv( const GLfloat *v )
+{
+ ATTR3F( VERT_ATTRIB_COLOR1, v[0], v[1], v[2] );
+}
+
+
+/* TexCoord
+ */
+static void tnl_TexCoord1f( GLfloat s )
+{
+ ATTR1F( VERT_ATTRIB_TEX0, s );
+}
+
+static void tnl_TexCoord1fv( const GLfloat *v )
+{
+ ATTR1F( VERT_ATTRIB_TEX0, v[0] );
+}
+
+static void tnl_TexCoord2f( GLfloat s, GLfloat t )
+{
+ ATTR2F( VERT_ATTRIB_TEX0, s, t );
+}
+
+static void tnl_TexCoord2fv( const GLfloat *v )
+{
+ ATTR2F( VERT_ATTRIB_TEX0, v[0], v[1] );
+}
+
+static void tnl_TexCoord3f( GLfloat s, GLfloat t, GLfloat r )
+{
+ ATTR3F( VERT_ATTRIB_TEX0, s, t, r );
+}
+
+static void tnl_TexCoord3fv( const GLfloat *v )
+{
+ ATTR3F( VERT_ATTRIB_TEX0, v[0], v[1], v[2] );
+}
+
+static void tnl_TexCoord4f( GLfloat s, GLfloat t, GLfloat r, GLfloat q )
+{
+ ATTR4F( VERT_ATTRIB_TEX0, s, t, r, q );
+}
+
+static void tnl_TexCoord4fv( const GLfloat *v )
+{
+ ATTR4F( VERT_ATTRIB_TEX0, v[0], v[1], v[2], v[3] );
+}
+
+
+/* MultiTexcoord
+ */
+static void tnl_MultiTexCoord1fARB( GLenum target, GLfloat s )
+{
+ GLint attr = (target - GL_TEXTURE0_ARB) + VERT_ATTRIB_TEX0;
+ ATTR1F( attr, s );
+}
+
+static void tnl_MultiTexCoord1fvARB( GLenum target, const GLfloat *v )
+{
+ GLint attr = (target - GL_TEXTURE0_ARB) + VERT_ATTRIB_TEX0;
+ ATTR1F( attr, v[0] );
+}
+
+static void tnl_MultiTexCoord2fARB( GLenum target, GLfloat s, GLfloat t )
+{
+ GLint attr = (target - GL_TEXTURE0_ARB) + VERT_ATTRIB_TEX0;
+ ATTR2F( attr, s, t );
+}
+
+static void tnl_MultiTexCoord2fvARB( GLenum target, const GLfloat *v )
+{
+ GLint attr = (target - GL_TEXTURE0_ARB) + VERT_ATTRIB_TEX0;
+ ATTR2F( attr, v[0], v[1] );
+}
+
+static void tnl_MultiTexCoord3fARB( GLenum target, GLfloat s, GLfloat t,
+ GLfloat r)
+{
+ GLint attr = (target - GL_TEXTURE0_ARB) + VERT_ATTRIB_TEX0;
+ ATTR3F( attr, s, t, r );
+}
+
+static void tnl_MultiTexCoord3fvARB( GLenum target, const GLfloat *v )
+{
+ GLint attr = (target - GL_TEXTURE0_ARB) + VERT_ATTRIB_TEX0;
+ ATTR3F( attr, v[0], v[1], v[2] );
+}
+
+static void tnl_MultiTexCoord4fARB( GLenum target, GLfloat s, GLfloat t,
+ GLfloat r, GLfloat q )
+{
+ GLint attr = (target - GL_TEXTURE0_ARB) + VERT_ATTRIB_TEX0;
+ ATTR4F( attr, s, t, r, q );
+}
+
+static void tnl_MultiTexCoord4fvARB( GLenum target, const GLfloat *v )
+{
+ GLint attr = (target - GL_TEXTURE0_ARB) + VERT_ATTRIB_TEX0;
+ ATTR4F( attr, v[0], v[1], v[2], v[3] );
+}
+
+
+/* NV_vertex_program:
+ *
+ * *** Need second dispatch layer above this for size tracking. One
+ * *** dispatch layer handles both VertexAttribute and MultiTexCoord
+ */
+static void tnl_VertexAttrib1fNV( GLuint index, GLfloat s )
+{
+ ATTR1F( index, s );
+}
+
+static void tnl_VertexAttrib1fvNV( GLuint index, const GLfloat *v )
+{
+ ATTR1F( index, v[0] );
+}
+
+static void tnl_VertexAttrib2fNV( GLuint index, GLfloat s, GLfloat t )
+{
+ ATTR2F( index, s, t );
+}
+
+static void tnl_VertexAttrib2fvNV( GLuint index, const GLfloat *v )
+{
+ ATTR2F( index, v[0], v[1] );
+}
+
+static void tnl_VertexAttrib3fNV( GLuint index, GLfloat s, GLfloat t,
+ GLfloat r )
+{
+ ATTR3F( index, s, t, r );
+}
+
+static void tnl_VertexAttrib3fvNV( GLuint index, const GLfloat *v )
+{
+ ATTR3F( index, v[0], v[1], v[2] );
+}
+
+static void tnl_VertexAttrib4fNV( GLuint index, GLfloat s, GLfloat t,
+ GLfloat r, GLfloat q )
+{
+ ATTR4F( index, s, t, r, q );
+}
+
+static void tnl_VertexAttrib4fvNV( GLuint index, const GLfloat *v )
+{
+ ATTR4F( index, v[0], v[1], v[2], v[3] );
+}
+
+
+/* Miscellaneous: (These don't alias NV attributes, right?)
+ */
+static void tnl_EdgeFlag( GLboolean flag )
+{
+ GET_TNL;
+ tnl->edgeflagptr[0] = flag;
+}
+
+static void tnl_EdgeFlagv( const GLboolean *flag )
+{
+ GET_TNL;
+ tnl->edgeflagptr[0] = *flag;
+}
+
+static void tnl_Indexi( GLint idx )
+{
+ GET_TNL;
+ tnl->indexptr[0] = idx;
+}
+
+static void tnl_Indexiv( const GLint *idx )
+{
+ GET_TNL;
+ tnl->indexptr[0] = *idx;
+}
+
+
+
+/* Could use dispatch switching to build 'ranges' of eval vertices for
+ * each type, avoiding need for flags. (Make
+ * evalcoords/evalpoints/vertices/attr0 mutually exclusive)
+ * --> In which case, may as well use Vertex{12}f{v} here.
+ */
+static void _tnl_EvalCoord1f( GLfloat u )
+{
+ ATTR1F( VERT_ATTRIB_POS, u );
+}
+
+static void _tnl_EvalCoord1fv( const GLfloat *v )
+{
+ ATTR1F( VERT_ATTRIB_POS, v[0] );
+}
+
+static void _tnl_EvalCoord2f( GLfloat u, GLfloat v )
+{
+ ATTR2F( VERT_ATTRIB_POS, u, v );
+}
+
+static void _tnl_EvalCoord2fv( const GLfloat *v )
+{
+ ATTR2F( VERT_ATTRIB_POS, v[0], v[1] );
+}
+
+
+/* Materials:
+ * *** Treat as more vertex attributes
+ */
+static void _tnl_Materialfv( GLenum face, GLenum pname,
+ const GLfloat *params )
+{
+ if (MESA_VERBOSE & DEBUG_VFMT)
+ fprintf(stderr, "%s\n", __FUNCTION__);
+
+ if (tnl->prim[0] != GL_POLYGON+1) {
+ VFMT_FALLBACK( __FUNCTION__ );
+ glMaterialfv( face, pname, params );
+ return;
+ }
+ _mesa_noop_Materialfv( face, pname, params );
+}
+
+
+
+
+/* Codegen support
+ */
+static struct dynfn *lookup( struct dynfn *l, int key )
+{
+ struct dynfn *f;
+
+ foreach( f, l ) {
+ if (f->key == key)
+ return f;
+ }
+
+ return 0;
+}
+
+/* Can't use the loopback template for this:
+ */
+#define CHOOSE(FN, FNTYPE, MASK, ACTIVE, ARGS1, ARGS2 ) \
+static void choose_##FN ARGS1 \
+{ \
+ int key = tnl->vertex_format & (MASK|ACTIVE); \
+ struct dynfn *dfn = lookup( &tnl->dfn_cache.FN, key ); \
+ \
+ if (dfn == 0) \
+ dfn = tnl->codegen.FN( &vb, key ); \
+ else if (MESA_VERBOSE & DEBUG_CODEGEN) \
+ fprintf(stderr, "%s -- cached codegen\n", __FUNCTION__ ); \
+ \
+ if (dfn) \
+ tnl->context->Exec->FN = (FNTYPE)(dfn->code); \
+ else { \
+ if (MESA_VERBOSE & DEBUG_CODEGEN) \
+ fprintf(stderr, "%s -- generic version\n", __FUNCTION__ ); \
+ tnl->context->Exec->FN = tnl_##FN; \
+ } \
+ \
+ tnl->context->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT; \
+ tnl->context->Exec->FN ARGS2; \
+}
+
+
+
+CHOOSE(Normal3f, p3f, 3, VERT_ATTRIB_NORMAL,
+ (GLfloat a,GLfloat b,GLfloat c), (a,b,c))
+CHOOSE(Normal3fv, pfv, 3, VERT_ATTRIB_NORMAL,
+ (const GLfloat *v), (v))
+
+CHOOSE(Color4ub, p4ub, 4, VERT_ATTRIB_COLOR0,
+ (GLubyte a,GLubyte b, GLubyte c, GLubyte d), (a,b,c,d))
+CHOOSE(Color4ubv, pubv, 4, VERT_ATTRIB_COLOR0,
+ (const GLubyte *v), (v))
+CHOOSE(Color3ub, p3ub, 3, VERT_ATTRIB_COLOR0,
+ (GLubyte a,GLubyte b, GLubyte c), (a,b,c))
+CHOOSE(Color3ubv, pubv, 3, VERT_ATTRIB_COLOR0,
+ (const GLubyte *v), (v))
+
+CHOOSE(Color4f, p4f, 4, VERT_ATTRIB_COLOR0,
+ (GLfloat a,GLfloat b, GLfloat c, GLfloat d), (a,b,c,d))
+CHOOSE(Color4fv, pfv, 4, VERT_ATTRIB_COLOR0,
+ (const GLfloat *v), (v))
+CHOOSE(Color3f, p3f, 3, VERT_ATTRIB_COLOR0,
+ (GLfloat a,GLfloat b, GLfloat c), (a,b,c))
+CHOOSE(Color3fv, pfv, 3, VERT_ATTRIB_COLOR0,
+ (const GLfloat *v), (v))
+
+
+CHOOSE(SecondaryColor3ubEXT, p3ub, VERT_ATTRIB_COLOR1,
+ (GLubyte a,GLubyte b, GLubyte c), (a,b,c))
+CHOOSE(SecondaryColor3ubvEXT, pubv, VERT_ATTRIB_COLOR1,
+ (const GLubyte *v), (v))
+CHOOSE(SecondaryColor3fEXT, p3f, VERT_ATTRIB_COLOR1,
+ (GLfloat a,GLfloat b, GLfloat c), (a,b,c))
+CHOOSE(SecondaryColor3fvEXT, pfv, VERT_ATTRIB_COLOR1,
+ (const GLfloat *v), (v))
+
+CHOOSE(TexCoord2f, p2f, VERT_ATTRIB_TEX0,
+ (GLfloat a,GLfloat b), (a,b))
+CHOOSE(TexCoord2fv, pfv, VERT_ATTRIB_TEX0,
+ (const GLfloat *v), (v))
+CHOOSE(TexCoord1f, p1f, VERT_ATTRIB_TEX0,
+ (GLfloat a), (a))
+CHOOSE(TexCoord1fv, pfv, VERT_ATTRIB_TEX0,
+ (const GLfloat *v), (v))
+
+CHOOSE(MultiTexCoord2fARB, pe2f, VERT_ATTRIB_TEX0,
+ (GLenum u,GLfloat a,GLfloat b), (u,a,b))
+CHOOSE(MultiTexCoord2fvARB, pefv, MASK_ST_ALL, ACTIVE_ST_ALL,
+ (GLenum u,const GLfloat *v), (u,v))
+CHOOSE(MultiTexCoord1fARB, pe1f, MASK_ST_ALL, ACTIVE_ST_ALL,
+ (GLenum u,GLfloat a), (u,a))
+CHOOSE(MultiTexCoord1fvARB, pefv, MASK_ST_ALL, ACTIVE_ST_ALL,
+ (GLenum u,const GLfloat *v), (u,v))
+
+CHOOSE(Vertex3f, p3f, VERT_ATTRIB_POS,
+ (GLfloat a,GLfloat b,GLfloat c), (a,b,c))
+CHOOSE(Vertex3fv, pfv, VERT_ATTRIB_POS,
+ (const GLfloat *v), (v))
+CHOOSE(Vertex2f, p2f, VERT_ATTRIB_POS,
+ (GLfloat a,GLfloat b), (a,b))
+CHOOSE(Vertex2fv, pfv, VERT_ATTRIB_POS,
+ (const GLfloat *v), (v))
+
+
+
+
+
+void _tnl_InitVtxfmtChoosers( GLvertexformat *vfmt )
+{
+ vfmt->Color3f = choose_Color3f;
+ vfmt->Color3fv = choose_Color3fv;
+ vfmt->Color3ub = choose_Color3ub;
+ vfmt->Color3ubv = choose_Color3ubv;
+ vfmt->Color4f = choose_Color4f;
+ vfmt->Color4fv = choose_Color4fv;
+ vfmt->Color4ub = choose_Color4ub;
+ vfmt->Color4ubv = choose_Color4ubv;
+ vfmt->SecondaryColor3fEXT = choose_SecondaryColor3fEXT;
+ vfmt->SecondaryColor3fvEXT = choose_SecondaryColor3fvEXT;
+ vfmt->SecondaryColor3ubEXT = choose_SecondaryColor3ubEXT;
+ vfmt->SecondaryColor3ubvEXT = choose_SecondaryColor3ubvEXT;
+ vfmt->MultiTexCoord1fARB = choose_MultiTexCoord1fARB;
+ vfmt->MultiTexCoord1fvARB = choose_MultiTexCoord1fvARB;
+ vfmt->MultiTexCoord2fARB = choose_MultiTexCoord2fARB;
+ vfmt->MultiTexCoord2fvARB = choose_MultiTexCoord2fvARB;
+ vfmt->Normal3f = choose_Normal3f;
+ vfmt->Normal3fv = choose_Normal3fv;
+ vfmt->TexCoord1f = choose_TexCoord1f;
+ vfmt->TexCoord1fv = choose_TexCoord1fv;
+ vfmt->TexCoord2f = choose_TexCoord2f;
+ vfmt->TexCoord2fv = choose_TexCoord2fv;
+ vfmt->Vertex2f = choose_Vertex2f;
+ vfmt->Vertex2fv = choose_Vertex2fv;
+ vfmt->Vertex3f = choose_Vertex3f;
+ vfmt->Vertex3fv = choose_Vertex3fv;
+ vfmt->TexCoord3f = choose_TexCoord3f;
+ vfmt->TexCoord3fv = choose_TexCoord3fv;
+ vfmt->TexCoord4f = choose_TexCoord4f;
+ vfmt->TexCoord4fv = choose_TexCoord4fv;
+ vfmt->MultiTexCoord3fARB = choose_MultiTexCoord3fARB;
+ vfmt->MultiTexCoord3fvARB = choose_MultiTexCoord3fvARB;
+ vfmt->MultiTexCoord4fARB = choose_MultiTexCoord4fARB;
+ vfmt->MultiTexCoord4fvARB = choose_MultiTexCoord4fvARB;
+ vfmt->Vertex4f = choose_Vertex4f;
+ vfmt->Vertex4fv = choose_Vertex4fv;
+ vfmt->FogCoordfvEXT = choose_FogCoordfvEXT;
+ vfmt->FogCoordfEXT = choose_FogCoordfEXT;
+ vfmt->EdgeFlag = choose_EdgeFlag;
+ vfmt->EdgeFlagv = choose_EdgeFlagv;
+ vfmt->Indexi = choose_Indexi;
+ vfmt->Indexiv = choose_Indexiv;
+ vfmt->EvalCoord1f = choose_EvalCoord1f;
+ vfmt->EvalCoord1fv = choose_EvalCoord1fv;
+ vfmt->EvalCoord2f = choose_EvalCoord2f;
+ vfmt->EvalCoord2fv = choose_EvalCoord2fv;
+ vfmt->EvalMesh1 = choose_EvalMesh1;
+ vfmt->EvalMesh2 = choose_EvalMesh2;
+ vfmt->EvalPoint1 = choose_EvalPoint1;
+ vfmt->EvalPoint2 = choose_EvalPoint2;
+
+ vfmt->Materialfv = _tnl_Materialfv;
+}
+
+
+static struct dynfn *codegen_noop( struct _vb *vb, int key )
+{
+ (void) vb; (void) key;
+ return 0;
+}
+
+void _tnl_InitCodegen( struct dfn_generators *gen )
+{
+ gen->Vertex2f = codegen_noop;
+ gen->Vertex2fv = codegen_noop;
+ gen->Vertex3f = codegen_noop;
+ gen->Vertex3fv = codegen_noop;
+ gen->Vertex4f = codegen_noop;
+ gen->Vertex4fv = codegen_noop;
+
+ gen->Attr1f = codegen_noop;
+ gen->Attr1fv = codegen_noop;
+ gen->Attr2f = codegen_noop;
+ gen->Attr2fv = codegen_noop;
+ gen->Attr3f = codegen_noop;
+ gen->Attr3fv = codegen_noop;
+ gen->Attr4f = codegen_noop;
+ gen->Attr4fv = codegen_noop;
+ gen->Attr3ub = codegen_noop;
+ gen->Attr3ubv = codegen_noop;
+ gen->Attr4ub = codegen_noop;
+ gen->Attr4ubv = codegen_noop;
+
+ /* Probably need two versions of this, one for the front end
+ * (double dispatch), one for the back end (do the work) -- but
+ * will also need a second level of CHOOSE functions?
+ * -- Generate the dispatch layer using the existing templates somehow.
+ * -- Generate the backend and 2nd level choosers here.
+ * -- No need for a chooser on the top level.
+ * -- Can aliasing help -- ie can NVAttr1f == Attr1f/Vertex2f at this level (index is known)
+ */
+ gen->NVAttr1f = codegen_noop;
+ gen->NVAttr1fv = codegen_noop;
+ gen->NVAttr2f = codegen_noop;
+ gen->NVAttr2fv = codegen_noop;
+ gen->NVAttr3f = codegen_noop;
+ gen->NVAttr3fv = codegen_noop;
+ gen->NVAttr4f = codegen_noop;
+ gen->NVAttr4fv = codegen_noop;
+
+ gen->MTAttr1f = codegen_noop;
+ gen->MTAttr1fv = codegen_noop;
+ gen->MTAttr2f = codegen_noop;
+ gen->MTAttr2fv = codegen_noop;
+ gen->MTAttr3f = codegen_noop;
+ gen->MTAttr3fv = codegen_noop;
+ gen->MTAttr4f = codegen_noop;
+ gen->MTAttr4fv = codegen_noop;
+
+ if (!getenv("MESA_NO_CODEGEN")) {
+#if defined(USE_X86_ASM)
+ _tnl_InitX86Codegen( gen );
+#endif
+
+#if defined(USE_SSE_ASM)
+ _tnl_InitSSECodegen( gen );
+#endif
+
+#if defined(USE_3DNOW_ASM)
+#endif
+
+#if defined(USE_SPARC_ASM)
+#endif
+ }
+}
diff --git a/src/mesa/tnl/t_vtx_api.h b/src/mesa/tnl/t_vtx_api.h
new file mode 100644
index 0000000000..6bfdbe8fe3
--- /dev/null
+++ b/src/mesa/tnl/t_vtx_api.h
@@ -0,0 +1,234 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#ifndef __RADEON_VTXFMT_H__
+#define __RADEON_VTXFMT_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+#include "_tnl__context.h"
+
+extern void _tnl_UpdateVtxfmt( GLcontext *ctx );
+extern void _tnl_InitVtxfmt( GLcontext *ctx );
+extern void _tnl_InvalidateVtxfmt( GLcontext *ctx );
+extern void _tnl_DestroyVtxfmt( GLcontext *ctx );
+
+typedef void (*p4f)( GLfloat, GLfloat, GLfloat, GLfloat );
+typedef void (*p3f)( GLfloat, GLfloat, GLfloat );
+typedef void (*p2f)( GLfloat, GLfloat );
+typedef void (*p1f)( GLfloat );
+typedef void (*pe2f)( GLenum, GLfloat, GLfloat );
+typedef void (*pe1f)( GLenum, GLfloat );
+typedef void (*p4ub)( GLubyte, GLubyte, GLubyte, GLubyte );
+typedef void (*p3ub)( GLubyte, GLubyte, GLubyte );
+typedef void (*pfv)( const GLfloat * );
+typedef void (*pefv)( GLenum, const GLfloat * );
+typedef void (*pubv)( const GLubyte * );
+
+/* Want to keep a cache of these around. Each is parameterized by
+ * only a single value which has only a small range. Only expect a
+ * few, so just rescan the list each time?
+ */
+struct dynfn {
+ struct dynfn *next, *prev;
+ int key;
+ char *code;
+};
+
+struct dfn_lists {
+ struct dynfn Vertex2f;
+ struct dynfn Vertex2fv;
+ struct dynfn Vertex3f;
+ struct dynfn Vertex3fv;
+ struct dynfn Color4ub;
+ struct dynfn Color4ubv;
+ struct dynfn Color3ub;
+ struct dynfn Color3ubv;
+ struct dynfn Color4f;
+ struct dynfn Color4fv;
+ struct dynfn Color3f;
+ struct dynfn Color3fv;
+ struct dynfn SecondaryColor3ubEXT;
+ struct dynfn SecondaryColor3ubvEXT;
+ struct dynfn SecondaryColor3fEXT;
+ struct dynfn SecondaryColor3fvEXT;
+ struct dynfn Normal3f;
+ struct dynfn Normal3fv;
+ struct dynfn TexCoord2f;
+ struct dynfn TexCoord2fv;
+ struct dynfn TexCoord1f;
+ struct dynfn TexCoord1fv;
+ struct dynfn MultiTexCoord2fARB;
+ struct dynfn MultiTexCoord2fvARB;
+ struct dynfn MultiTexCoord1fARB;
+ struct dynfn MultiTexCoord1fvARB;
+};
+
+struct _vb;
+
+struct dfn_generators {
+ struct dynfn *(*Vertex2f)( struct _vb *, int );
+ struct dynfn *(*Vertex2fv)( struct _vb *, int );
+ struct dynfn *(*Vertex3f)( struct _vb *, int );
+ struct dynfn *(*Vertex3fv)( struct _vb *, int );
+ struct dynfn *(*Color4ub)( struct _vb *, int );
+ struct dynfn *(*Color4ubv)( struct _vb *, int );
+ struct dynfn *(*Color3ub)( struct _vb *, int );
+ struct dynfn *(*Color3ubv)( struct _vb *, int );
+ struct dynfn *(*Color4f)( struct _vb *, int );
+ struct dynfn *(*Color4fv)( struct _vb *, int );
+ struct dynfn *(*Color3f)( struct _vb *, int );
+ struct dynfn *(*Color3fv)( struct _vb *, int );
+ struct dynfn *(*SecondaryColor3ubEXT)( struct _vb *, int );
+ struct dynfn *(*SecondaryColor3ubvEXT)( struct _vb *, int );
+ struct dynfn *(*SecondaryColor3fEXT)( struct _vb *, int );
+ struct dynfn *(*SecondaryColor3fvEXT)( struct _vb *, int );
+ struct dynfn *(*Normal3f)( struct _vb *, int );
+ struct dynfn *(*Normal3fv)( struct _vb *, int );
+ struct dynfn *(*TexCoord2f)( struct _vb *, int );
+ struct dynfn *(*TexCoord2fv)( struct _vb *, int );
+ struct dynfn *(*TexCoord1f)( struct _vb *, int );
+ struct dynfn *(*TexCoord1fv)( struct _vb *, int );
+ struct dynfn *(*MultiTexCoord2fARB)( struct _vb *, int );
+ struct dynfn *(*MultiTexCoord2fvARB)( struct _vb *, int );
+ struct dynfn *(*MultiTexCoord1fARB)( struct _vb *, int );
+ struct dynfn *(*MultiTexCoord1fvARB)( struct _vb *, int );
+};
+
+struct prim {
+ GLuint start;
+ GLuint end;
+ GLuint prim;
+};
+
+#define _TNL__MAX_PRIMS 64
+
+
+
+struct tnl_vbinfo {
+ /* Keep these first: referenced from codegen templates:
+ */
+ GLint counter;
+ GLint *dmaptr;
+ void (*notify)( void );
+ union { float f; int i; GLubyte ub4[4]; } vertex[16*4];
+
+ GLfloat *attrptr[16];
+ GLuint size[16];
+
+ GLenum *prim; /* &ctx->Driver.CurrentExecPrimitive */
+ GLuint primflags;
+
+ GLboolean installed;
+ GLboolean recheck;
+
+ GLint vertex_size;
+ GLint initial_counter;
+ GLint nrverts;
+ GLuint vertex_format;
+
+ GLuint installed_vertex_format;
+
+ struct prim primlist[RADEON_MAX_PRIMS];
+ int nrprims;
+
+ struct dfn_lists dfn_cache;
+ struct dfn_generators codegen;
+ GLvertexformat vtxfmt;
+};
+
+
+extern void _tnl_InitVtxfmtChoosers( GLvertexformat *vfmt );
+
+
+#define FIXUP( CODE, OFFSET, CHECKVAL, NEWVAL ) \
+do { \
+ int *icode = (int *)(CODE+OFFSET); \
+ assert (*icode == CHECKVAL); \
+ *icode = (int)NEWVAL; \
+} while (0)
+
+
+/* Useful for figuring out the offsets:
+ */
+#define FIXUP2( CODE, OFFSET, CHECKVAL, NEWVAL ) \
+do { \
+ while (*(int *)(CODE+OFFSET) != CHECKVAL) OFFSET++; \
+ fprintf(stderr, "%s/%d CVAL %x OFFSET %d\n", __FUNCTION__, \
+ __LINE__, CHECKVAL, OFFSET); \
+ *(int *)(CODE+OFFSET) = (int)NEWVAL; \
+ OFFSET += 4; \
+} while (0)
+
+/*
+ */
+void _tnl_InitCodegen( struct dfn_generators *gen );
+void _tnl_InitX86Codegen( struct dfn_generators *gen );
+void _tnl_InitSSECodegen( struct dfn_generators *gen );
+
+void _tnl_copy_to_current( GLcontext *ctx );
+
+
+/* Defined in tnl_vtxfmt_c.c.
+ */
+struct dynfn *tnl_makeX86Vertex2f( TNLcontext *, int );
+struct dynfn *tnl_makeX86Vertex2fv( TNLcontext *, int );
+struct dynfn *tnl_makeX86Vertex3f( TNLcontext *, int );
+struct dynfn *tnl_makeX86Vertex3fv( TNLcontext *, int );
+struct dynfn *tnl_makeX86Color4ub( TNLcontext *, int );
+struct dynfn *tnl_makeX86Color4ubv( TNLcontext *, int );
+struct dynfn *tnl_makeX86Color3ub( TNLcontext *, int );
+struct dynfn *tnl_makeX86Color3ubv( TNLcontext *, int );
+struct dynfn *tnl_makeX86Color4f( TNLcontext *, int );
+struct dynfn *tnl_makeX86Color4fv( TNLcontext *, int );
+struct dynfn *tnl_makeX86Color3f( TNLcontext *, int );
+struct dynfn *tnl_makeX86Color3fv( TNLcontext *, int );
+struct dynfn *tnl_makeX86SecondaryColor3ubEXT( TNLcontext *, int );
+struct dynfn *tnl_makeX86SecondaryColor3ubvEXT( TNLcontext *, int );
+struct dynfn *tnl_makeX86SecondaryColor3fEXT( TNLcontext *, int );
+struct dynfn *tnl_makeX86SecondaryColor3fvEXT( TNLcontext *, int );
+struct dynfn *tnl_makeX86Normal3f( TNLcontext *, int );
+struct dynfn *tnl_makeX86Normal3fv( TNLcontext *, int );
+struct dynfn *tnl_makeX86TexCoord2f( TNLcontext *, int );
+struct dynfn *tnl_makeX86TexCoord2fv( TNLcontext *, int );
+struct dynfn *tnl_makeX86TexCoord1f( TNLcontext *, int );
+struct dynfn *tnl_makeX86TexCoord1fv( TNLcontext *, int );
+struct dynfn *tnl_makeX86MultiTexCoord2fARB( TNLcontext *, int );
+struct dynfn *tnl_makeX86MultiTexCoord2fvARB( TNLcontext *, int );
+struct dynfn *tnl_makeX86MultiTexCoord1fARB( TNLcontext *, int );
+struct dynfn *tnl_makeX86MultiTexCoord1fvARB( TNLcontext *, int );
+
+
+#endif
+#endif
diff --git a/src/mesa/tnl/t_vtx_exec.c b/src/mesa/tnl/t_vtx_exec.c
new file mode 100644
index 0000000000..8470d6ab35
--- /dev/null
+++ b/src/mesa/tnl/t_vtx_exec.c
@@ -0,0 +1,636 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+#include "api_noop.h"
+#include "api_arrayelt.h"
+#include "context.h"
+#include "mem.h"
+#include "mmath.h"
+#include "mtypes.h"
+#include "enums.h"
+#include "glapi.h"
+#include "colormac.h"
+#include "light.h"
+#include "state.h"
+#include "vtxfmt.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_array_api.h"
+
+static void _tnl_FlushVertices( GLcontext *, GLuint );
+
+
+void tnl_copy_to_current( GLcontext *ctx )
+{
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ GLuint flag = tnl->vertex_format;
+ GLint i;
+
+ assert(ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT);
+
+ for (i = 0 ; i < 16 ; i++)
+ if (flag & (1<<i))
+ COPY_4FV( ctx->Current.Attrib[i], tnl->attribptr[i] );
+
+ if (flag & VERT_BIT_INDEX)
+ ctx->Current.Index = tnl->indexptr[0];
+
+ if (flag & VERT_BIT_EDGEFLAG)
+ ctx->Current.EdgeFlag = tnl->edgeflagptr[0];
+
+ if (flag & VERT_BIT_MATERIAL) {
+ _mesa_update_material( ctx,
+ IM->Material[IM->LastMaterial],
+ IM->MaterialOrMask );
+
+ tnl->Driver.NotifyMaterialChange( ctx );
+ }
+
+
+ ctx->Driver.NeedFlush &= ~FLUSH_UPDATE_CURRENT;
+}
+
+static GLboolean discreet_gl_prim[GL_POLYGON+1] = {
+ 1, /* 0 points */
+ 1, /* 1 lines */
+ 0, /* 2 line_strip */
+ 0, /* 3 line_loop */
+ 1, /* 4 tris */
+ 0, /* 5 tri_fan */
+ 0, /* 6 tri_strip */
+ 1, /* 7 quads */
+ 0, /* 8 quadstrip */
+ 0, /* 9 poly */
+};
+
+/* Optimize the primitive list: ONLY FOR EXECUTE ATM
+ */
+static void optimize_prims( TNLcontext *tnl )
+{
+ int i, j;
+
+ if (tnl->nrprims <= 1)
+ return;
+
+ for (j = 0, i = 1 ; i < tnl->nrprims; i++) {
+ int pj = tnl->primlist[j].prim & 0xf;
+ int pi = tnl->primlist[i].prim & 0xf;
+
+ if (pj == pi && discreet_gl_prim[pj] &&
+ tnl->primlist[i].start == tnl->primlist[j].end) {
+ tnl->primlist[j].end = tnl->primlist[i].end;
+ }
+ else {
+ j++;
+ if (j != i) tnl->primlist[j] = tnl->primlist[i];
+ }
+ }
+
+ tnl->nrprims = j+1;
+}
+
+
+/* Bind vertex buffer pointers, run pipeline:
+ */
+static void flush_prims( TNLcontext *tnl )
+{
+ int i,j;
+
+ tnl->dma.current.ptr = tnl->dma.current.start +=
+ (tnl->initial_counter - tnl->counter) * tnl->vertex_size * 4;
+
+ tnl->tcl.vertex_format = tnl->vertex_format;
+ tnl->tcl.aos_components[0] = &tmp;
+ tnl->tcl.nr_aos_components = 1;
+ tnl->dma.flush = 0;
+
+ tnl->Driver.RunPipeline( ... );
+
+ tnl->nrprims = 0;
+}
+
+
+static void start_prim( TNLcontext *tnl, GLuint mode )
+{
+ if (MESA_VERBOSE & DEBUG_VFMT)
+ fprintf(stderr, "%s %d\n", __FUNCTION__,
+ tnl->initial_counter - tnl->counter);
+
+ tnl->primlist[tnl->nrprims].start = tnl->initial_counter - tnl->counter;
+ tnl->primlist[tnl->nrprims].prim = mode;
+}
+
+static void note_last_prim( TNLcontext *tnl, GLuint flags )
+{
+ if (MESA_VERBOSE & DEBUG_VFMT)
+ fprintf(stderr, "%s %d\n", __FUNCTION__,
+ tnl->initial_counter - tnl->counter);
+
+ if (tnl->prim[0] != GL_POLYGON+1) {
+ tnl->primlist[tnl->nrprims].prim |= flags;
+ tnl->primlist[tnl->nrprims].end = tnl->initial_counter - tnl->counter;
+
+ if (++tnl->nrprims == TNL_MAX_PRIMS)
+ flush_prims( tnl );
+ }
+}
+
+
+static void copy_vertex( TNLcontext *tnl, GLuint n, GLfloat *dst )
+{
+ GLuint i;
+ GLfloat *src = (GLfloat *)(tnl->dma.current.address +
+ tnl->dma.current.ptr +
+ (tnl->primlist[tnl->nrprims].start + n) *
+ tnl->vertex_size * 4);
+
+ if (MESA_VERBOSE & DEBUG_VFMT)
+ fprintf(stderr, "copy_vertex %d\n",
+ tnl->primlist[tnl->nrprims].start + n);
+
+ for (i = 0 ; i < tnl->vertex_size; i++) {
+ dst[i] = src[i];
+ }
+}
+
+/* NOTE: This actually reads the copied vertices back from uncached
+ * memory. Could also use the counter/notify mechanism to populate
+ * tmp on the fly as vertices are generated.
+ */
+static GLuint copy_wrapped_verts( TNLcontext *tnl, GLfloat (*tmp)[15] )
+{
+ GLuint ovf, i;
+ GLuint nr = (tnl->initial_counter - tnl->counter) - tnl->primlist[tnl->nrprims].start;
+
+ if (MESA_VERBOSE & DEBUG_VFMT)
+ fprintf(stderr, "%s %d verts\n", __FUNCTION__, nr);
+
+ switch( tnl->prim[0] )
+ {
+ case GL_POINTS:
+ return 0;
+ case GL_LINES:
+ ovf = nr&1;
+ for (i = 0 ; i < ovf ; i++)
+ copy_vertex( tnl, nr-ovf+i, tmp[i] );
+ return i;
+ case GL_TRIANGLES:
+ ovf = nr%3;
+ for (i = 0 ; i < ovf ; i++)
+ copy_vertex( tnl, nr-ovf+i, tmp[i] );
+ return i;
+ case GL_QUADS:
+ ovf = nr&3;
+ for (i = 0 ; i < ovf ; i++)
+ copy_vertex( tnl, nr-ovf+i, tmp[i] );
+ return i;
+ case GL_LINE_STRIP:
+ if (nr == 0)
+ return 0;
+ copy_vertex( tnl, nr-1, tmp[0] );
+ return 1;
+ case GL_LINE_LOOP:
+ case GL_TRIANGLE_FAN:
+ case GL_POLYGON:
+ if (nr == 0)
+ return 0;
+ else if (nr == 1) {
+ copy_vertex( tnl, 0, tmp[0] );
+ return 1;
+ } else {
+ copy_vertex( tnl, 0, tmp[0] );
+ copy_vertex( tnl, nr-1, tmp[1] );
+ return 2;
+ }
+ case GL_TRIANGLE_STRIP:
+ ovf = MIN2( nr-1, 2 );
+ for (i = 0 ; i < ovf ; i++)
+ copy_vertex( tnl, nr-ovf+i, tmp[i] );
+ return i;
+ case GL_QUAD_STRIP:
+ ovf = MIN2( nr-1, 2 );
+ if (nr > 2) ovf += nr&1;
+ for (i = 0 ; i < ovf ; i++)
+ copy_vertex( tnl, nr-ovf+i, tmp[i] );
+ return i;
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+
+
+/* Extend for vertex-format changes on wrap:
+ */
+static void wrap_buffer( void )
+{
+ TNLcontext *tnl = tnl->tnl;
+ GLfloat tmp[3][15];
+ GLuint i, nrverts;
+
+ if (MESA_VERBOSE & (DEBUG_VFMT|DEBUG_PRIMS))
+ fprintf(stderr, "%s %d\n", __FUNCTION__,
+ tnl->initial_counter - tnl->counter);
+
+ /* Don't deal with parity. *** WONT WORK FOR COMPILE
+ */
+ if ((((tnl->initial_counter - tnl->counter) -
+ tnl->primlist[tnl->nrprims].start) & 1)) {
+ tnl->counter++;
+ tnl->initial_counter++;
+ return;
+ }
+
+ /* Copy vertices out of dma:
+ */
+ nrverts = copy_dma_verts( tnl, tmp );
+
+ if (MESA_VERBOSE & DEBUG_VFMT)
+ fprintf(stderr, "%d vertices to copy\n", nrverts);
+
+
+ /* Finish the prim at this point:
+ */
+ note_last_prim( tnl, 0 );
+ flush_prims( tnl );
+
+ /* Reset counter, dmaptr
+ */
+ tnl->dmaptr = (int *)(tnl->dma.current.ptr + tnl->dma.current.address);
+ tnl->counter = (tnl->dma.current.end - tnl->dma.current.ptr) /
+ (tnl->vertex_size * 4);
+ tnl->counter--;
+ tnl->initial_counter = tnl->counter;
+ tnl->notify = wrap_buffer;
+
+ tnl->dma.flush = flush_prims;
+ start_prim( tnl, tnl->prim[0] );
+
+
+ /* Reemit saved vertices
+ * *** POSSIBLY IN NEW FORMAT
+ * --> Can't always extend at end of vertex?
+ */
+ for (i = 0 ; i < nrverts; i++) {
+ if (MESA_VERBOSE & DEBUG_VERTS) {
+ int j;
+ fprintf(stderr, "re-emit vertex %d to %p\n", i, tnl->dmaptr);
+ if (MESA_VERBOSE & DEBUG_VERBOSE)
+ for (j = 0 ; j < tnl->vertex_size; j++)
+ fprintf(stderr, "\t%08x/%f\n", *(int*)&tmp[i][j], tmp[i][j]);
+ }
+
+ memcpy( tnl->dmaptr, tmp[i], tnl->vertex_size * 4 );
+ tnl->dmaptr += tnl->vertex_size;
+ tnl->counter--;
+ }
+}
+
+
+
+/* Always follow data, don't try to predict what's necessary.
+ */
+static GLboolean check_vtx_fmt( GLcontext *ctx )
+{
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+
+ if (ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT)
+ ctx->Driver.FlushVertices( ctx, FLUSH_UPDATE_CURRENT );
+
+
+ TNL_NEWPRIM(tnl);
+ tnl->vertex_format = VERT_BIT_POS;
+ tnl->prim = &ctx->Driver.CurrentExecPrimitive;
+
+
+ /* Currently allow the full 4 components per attrib. Can use the
+ * mechanism from radeon driver color handling to reduce this (and
+ * also to store ubyte colors where these are incoming). This
+ * won't work for compile mode.
+ *
+ * Only adding components when they are first received eliminates
+ * the need for displaylist fixup, as there are no 'empty' slots
+ * at the start of buffers.
+ */
+ for (i = 0 ; i < 16 ; i++) {
+ if (ind & (1<<i)) {
+ tnl->attribptr[i] = &tnl->vertex[tnl->vertex_size].f;
+ tnl->vertex_size += 4;
+ tnl->attribptr[i][0] = ctx->Current.Attrib[i][0];
+ tnl->attribptr[i][1] = ctx->Current.Attrib[i][1];
+ tnl->attribptr[i][2] = ctx->Current.Attrib[i][2];
+ tnl->attribptr[i][3] = ctx->Current.Attrib[i][3];
+ }
+ else
+ tnl->attribptr[i] = ctx->Current.Attrib[i];
+ }
+
+ /* Edgeflag, Index:
+ */
+ for (i = 16 ; i < 18 ; i++)
+ ;
+
+ /* Materials:
+ */
+ for (i = 18 ; i < 28 ; i++)
+ ;
+
+ /* Eval:
+ */
+ for (i = 28 ; i < 29 ; i++)
+ ;
+
+
+ if (tnl->installed_vertex_format != tnl->vertex_format) {
+ if (MESA_VERBOSE & DEBUG_VFMT)
+ fprintf(stderr, "reinstall on vertex_format change\n");
+ _mesa_install_exec_vtxfmt( ctx, &tnl->vtxfmt );
+ tnl->installed_vertex_format = tnl->vertex_format;
+ }
+
+ return GL_TRUE;
+}
+
+
+void _tnl_InvalidateVtxfmt( GLcontext *ctx )
+{
+ tnl->recheck = GL_TRUE;
+ tnl->fell_back = GL_FALSE;
+}
+
+
+
+
+static void _tnl_ValidateVtxfmt( GLcontext *ctx )
+{
+ if (MESA_VERBOSE & DEBUG_VFMT)
+ fprintf(stderr, "%s\n", __FUNCTION__);
+
+ if (ctx->Driver.NeedFlush)
+ ctx->Driver.FlushVertices( ctx, ctx->Driver.NeedFlush );
+
+ tnl->recheck = GL_FALSE;
+
+ if (check_vtx_fmt( ctx )) {
+ if (!tnl->installed) {
+ if (MESA_VERBOSE & DEBUG_VFMT)
+ fprintf(stderr, "reinstall (new install)\n");
+
+ _mesa_install_exec_vtxfmt( ctx, &tnl->vtxfmt );
+ ctx->Driver.FlushVertices = _tnl_FlushVertices;
+ tnl->installed = GL_TRUE;
+ }
+ else
+ fprintf(stderr, "%s: already installed", __FUNCTION__);
+ }
+ else {
+ if (MESA_VERBOSE & DEBUG_VFMT)
+ fprintf(stderr, "%s: failed\n", __FUNCTION__);
+
+ if (tnl->installed) {
+ if (tnl->tnl->dma.flush)
+ tnl->tnl->dma.flush( tnl->tnl );
+ _tnl_wakeup_exec( ctx );
+ tnl->installed = GL_FALSE;
+ }
+ }
+}
+
+
+
+
+
+/* Begin/End
+ */
+static void _tnl_Begin( GLenum mode )
+{
+ GLcontext *ctx = tnl->context;
+ TNLcontext *tnl = tnl->tnl;
+
+ if (MESA_VERBOSE & DEBUG_VFMT)
+ fprintf(stderr, "%s\n", __FUNCTION__);
+
+ if (mode > GL_POLYGON) {
+ _mesa_error( ctx, GL_INVALID_ENUM, "glBegin" );
+ return;
+ }
+
+ if (tnl->prim[0] != GL_POLYGON+1) {
+ _mesa_error( ctx, GL_INVALID_OPERATION, "glBegin" );
+ return;
+ }
+
+ if (ctx->NewState)
+ _mesa_update_state( ctx );
+
+ if (tnl->recheck)
+ _tnl_ValidateVtxfmt( ctx );
+
+ if (tnl->dma.flush && tnl->counter < 12) {
+ if (MESA_VERBOSE & DEBUG_VFMT)
+ fprintf(stderr, "%s: flush almost-empty buffers\n", __FUNCTION__);
+ flush_prims( tnl );
+ }
+
+ if (!tnl->dma.flush) {
+ if (tnl->dma.current.ptr + 12*tnl->vertex_size*4 >
+ tnl->dma.current.end) {
+ TNL_NEWPRIM( tnl );
+ _tnl_RefillCurrentDmaRegion( tnl );
+ }
+
+ tnl->dmaptr = (int *)(tnl->dma.current.address + tnl->dma.current.ptr);
+ tnl->counter = (tnl->dma.current.end - tnl->dma.current.ptr) /
+ (tnl->vertex_size * 4);
+ tnl->counter--;
+ tnl->initial_counter = tnl->counter;
+ tnl->notify = wrap_buffer;
+ tnl->dma.flush = flush_prims;
+ tnl->context->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+ }
+
+
+ tnl->prim[0] = mode;
+ start_prim( tnl, mode | PRIM_BEGIN );
+}
+
+
+
+
+
+static void _tnl_End( void )
+{
+ TNLcontext *tnl = tnl->tnl;
+ GLcontext *ctx = tnl->context;
+
+ if (MESA_VERBOSE & DEBUG_VFMT)
+ fprintf(stderr, "%s\n", __FUNCTION__);
+
+ if (tnl->prim[0] == GL_POLYGON+1) {
+ _mesa_error( ctx, GL_INVALID_OPERATION, "glEnd" );
+ return;
+ }
+
+ note_last_prim( tnl, PRIM_END );
+ tnl->prim[0] = GL_POLYGON+1;
+}
+
+
+static void _tnl_FlushVertices( GLcontext *ctx, GLuint flags )
+{
+ if (MESA_VERBOSE & DEBUG_VFMT)
+ fprintf(stderr, "%s\n", __FUNCTION__);
+
+ assert(tnl->installed);
+
+ if (flags & FLUSH_UPDATE_CURRENT) {
+ _tnl_copy_to_current( ctx );
+ if (MESA_VERBOSE & DEBUG_VFMT)
+ fprintf(stderr, "reinstall on update_current\n");
+ _mesa_install_exec_vtxfmt( ctx, &tnl->vtxfmt );
+ ctx->Driver.NeedFlush &= ~FLUSH_UPDATE_CURRENT;
+ }
+
+ if (flags & FLUSH_STORED_VERTICES) {
+ TNLcontext *tnl = TNL_CONTEXT( ctx );
+ assert (tnl->dma.flush == 0 ||
+ tnl->dma.flush == flush_prims);
+ if (tnl->dma.flush == flush_prims)
+ flush_prims( TNL_CONTEXT( ctx ) );
+ ctx->Driver.NeedFlush &= ~FLUSH_STORED_VERTICES;
+ }
+}
+
+
+
+/* At this point, don't expect very many versions of each function to
+ * be generated, so not concerned about freeing them?
+ */
+
+
+static void _tnl_InitVtxfmt( GLcontext *ctx )
+{
+ GLvertexformat *vfmt = &(tnl->vtxfmt);
+
+ MEMSET( vfmt, 0, sizeof(GLvertexformat) );
+
+ /* Hook in chooser functions for codegen, etc:
+ */
+ _tnl_InitVtxfmtChoosers( vfmt );
+
+ /* Handled fully in supported states, but no codegen:
+ */
+ vfmt->ArrayElement = _ae_loopback_array_elt; /* generic helper */
+ vfmt->Rectf = _mesa_noop_Rectf; /* generic helper */
+ vfmt->Begin = _tnl_Begin;
+ vfmt->End = _tnl_End;
+
+ tnl->context = ctx;
+ tnl->tnl = TNL_CONTEXT(ctx);
+ tnl->prim = &ctx->Driver.CurrentExecPrimitive;
+ tnl->primflags = 0;
+
+ make_empty_list( &tnl->dfn_cache.Vertex2f );
+ make_empty_list( &tnl->dfn_cache.Vertex2fv );
+ make_empty_list( &tnl->dfn_cache.Vertex3f );
+ make_empty_list( &tnl->dfn_cache.Vertex3fv );
+ make_empty_list( &tnl->dfn_cache.Color4ub );
+ make_empty_list( &tnl->dfn_cache.Color4ubv );
+ make_empty_list( &tnl->dfn_cache.Color3ub );
+ make_empty_list( &tnl->dfn_cache.Color3ubv );
+ make_empty_list( &tnl->dfn_cache.Color4f );
+ make_empty_list( &tnl->dfn_cache.Color4fv );
+ make_empty_list( &tnl->dfn_cache.Color3f );
+ make_empty_list( &tnl->dfn_cache.Color3fv );
+ make_empty_list( &tnl->dfn_cache.SecondaryColor3fEXT );
+ make_empty_list( &tnl->dfn_cache.SecondaryColor3fvEXT );
+ make_empty_list( &tnl->dfn_cache.SecondaryColor3ubEXT );
+ make_empty_list( &tnl->dfn_cache.SecondaryColor3ubvEXT );
+ make_empty_list( &tnl->dfn_cache.Normal3f );
+ make_empty_list( &tnl->dfn_cache.Normal3fv );
+ make_empty_list( &tnl->dfn_cache.TexCoord2f );
+ make_empty_list( &tnl->dfn_cache.TexCoord2fv );
+ make_empty_list( &tnl->dfn_cache.TexCoord1f );
+ make_empty_list( &tnl->dfn_cache.TexCoord1fv );
+ make_empty_list( &tnl->dfn_cache.MultiTexCoord2fARB );
+ make_empty_list( &tnl->dfn_cache.MultiTexCoord2fvARB );
+ make_empty_list( &tnl->dfn_cache.MultiTexCoord1fARB );
+ make_empty_list( &tnl->dfn_cache.MultiTexCoord1fvARB );
+
+ _tnl_InitCodegen( &tnl->codegen );
+}
+
+static void free_funcs( struct dynfn *l )
+{
+ struct dynfn *f, *tmp;
+ foreach_s (f, tmp, l) {
+ remove_from_list( f );
+ ALIGN_FREE( f->code );
+ FREE( f );
+ }
+}
+
+
+static void _tnl_DestroyVtxfmt( GLcontext *ctx )
+{
+ count_funcs();
+ free_funcs( &tnl->dfn_cache.Vertex2f );
+ free_funcs( &tnl->dfn_cache.Vertex2fv );
+ free_funcs( &tnl->dfn_cache.Vertex3f );
+ free_funcs( &tnl->dfn_cache.Vertex3fv );
+ free_funcs( &tnl->dfn_cache.Color4ub );
+ free_funcs( &tnl->dfn_cache.Color4ubv );
+ free_funcs( &tnl->dfn_cache.Color3ub );
+ free_funcs( &tnl->dfn_cache.Color3ubv );
+ free_funcs( &tnl->dfn_cache.Color4f );
+ free_funcs( &tnl->dfn_cache.Color4fv );
+ free_funcs( &tnl->dfn_cache.Color3f );
+ free_funcs( &tnl->dfn_cache.Color3fv );
+ free_funcs( &tnl->dfn_cache.SecondaryColor3ubEXT );
+ free_funcs( &tnl->dfn_cache.SecondaryColor3ubvEXT );
+ free_funcs( &tnl->dfn_cache.SecondaryColor3fEXT );
+ free_funcs( &tnl->dfn_cache.SecondaryColor3fvEXT );
+ free_funcs( &tnl->dfn_cache.Normal3f );
+ free_funcs( &tnl->dfn_cache.Normal3fv );
+ free_funcs( &tnl->dfn_cache.TexCoord2f );
+ free_funcs( &tnl->dfn_cache.TexCoord2fv );
+ free_funcs( &tnl->dfn_cache.TexCoord1f );
+ free_funcs( &tnl->dfn_cache.TexCoord1fv );
+ free_funcs( &tnl->dfn_cache.MultiTexCoord2fARB );
+ free_funcs( &tnl->dfn_cache.MultiTexCoord2fvARB );
+ free_funcs( &tnl->dfn_cache.MultiTexCoord1fARB );
+ free_funcs( &tnl->dfn_cache.MultiTexCoord1fvARB );
+}
+
diff --git a/src/mesa/tnl/t_vtx_sse.c b/src/mesa/tnl/t_vtx_sse.c
new file mode 100644
index 0000000000..240d6cf8b9
--- /dev/null
+++ b/src/mesa/tnl/t_vtx_sse.c
@@ -0,0 +1,93 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include <stdio.h>
+#include <assert.h>
+#include "mem.h"
+#include "simple_list.h"
+#include "t_vtx_api.h"
+
+#if defined(USE_SSE_ASM)
+
+/* Build specialized versions of the immediate calls on the fly for
+ * the current state. ???P4 SSE2 versions???
+ */
+
+
+static struct dynfn *makeSSENormal3fv( struct _vb *vb, int key )
+{
+ /* Requires P4 (sse2?)
+ */
+ static unsigned char temp[] = {
+ 0x8b, 0x44, 0x24, 0x04, /* mov 0x4(%esp,1),%eax */
+ 0xba, 0x78, 0x56, 0x34, 0x12, /* mov $0x12345678,%edx */
+ 0xf3, 0x0f, 0x7e, 0x00, /* movq (%eax),%xmm0 */
+ 0x66, 0x0f, 0x6e, 0x48, 0x08, /* movd 0x8(%eax),%xmm1 */
+ 0x66, 0x0f, 0xd6, 0x42, 0x0c, /* movq %xmm0,0xc(%edx) */
+ 0x66, 0x0f, 0x7e, 0x4a, 0x14, /* movd %xmm1,0x14(%edx) */
+ 0xc3, /* ret */
+ };
+
+
+ struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+ insert_at_head( &vb->dfn_cache.Normal3fv, dfn );
+ dfn->key = key;
+
+ dfn->code = ALIGN_MALLOC( sizeof(temp), 16 );
+ memcpy (dfn->code, temp, sizeof(temp));
+ FIXUP(dfn->code, 5, 0x0, (int)vb->normalptr);
+ return dfn;
+}
+
+void _tnl_InitSSECodegen( struct dfn_generators *gen )
+{
+ /* Need to:
+ * - check kernel sse support
+ * - check p4/sse2
+ */
+ (void) makeSSENormal3fv;
+}
+
+
+#else
+
+void _tnl_InitSSECodegen( struct dfn_generators *gen )
+{
+ (void) gen;
+}
+
+#endif
+
+
+
+
diff --git a/src/mesa/tnl/t_vtx_x86.c b/src/mesa/tnl/t_vtx_x86.c
new file mode 100644
index 0000000000..4713a325bf
--- /dev/null
+++ b/src/mesa/tnl/t_vtx_x86.c
@@ -0,0 +1,727 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include <stdio.h>
+#include <assert.h>
+#include "mem.h"
+#include "mmath.h"
+#include "simple_list.h"
+#include "tnl_vtxfmt.h"
+
+#if defined(USE_X86_ASM)
+
+/* Build specialized versions of the immediate calls on the fly for
+ * the current state. Generic x86 versions.
+ */
+
+struct dynfn *tnl_makeX86Vertex3f( TNLcontext *tnl, int key )
+{
+ struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+ if (RADEON_DEBUG & DEBUG_CODEGEN)
+ fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+ switch (tnl->vertex_size) {
+ case 4: {
+ static char temp[] = {
+ 0x8b, 0x0d, 0,0,0,0, /* mov DMAPTR,%ecx */
+ 0x8b, 0x44, 0x24, 0x04, /* mov 0x4(%esp,1),%eax */
+ 0x8b, 0x54, 0x24, 0x08, /* mov 0x8(%esp,1),%edx */
+ 0x89, 0x01, /* mov %eax,(%ecx) */
+ 0x89, 0x51, 0x04, /* mov %edx,0x4(%ecx) */
+ 0x8b, 0x44, 0x24, 0x0c, /* mov 0xc(%esp,1),%eax */
+ 0x8b, 0x15, 0,0,0,0, /* mov VERTEX[3],%edx */
+ 0x89, 0x41, 0x08, /* mov %eax,0x8(%ecx) */
+ 0x89, 0x51, 0x0c, /* mov %edx,0xc(%ecx) */
+ 0xa1, 0, 0, 0, 0, /* mov COUNTER,%eax */
+ 0x83, 0xc1, 0x10, /* add $0x10,%ecx */
+ 0x48, /* dec %eax */
+ 0x89, 0x0d, 0,0,0,0, /* mov %ecx,DMAPTR */
+ 0xa3, 0, 0, 0, 0, /* mov %eax,COUNTER */
+ 0x74, 0x01, /* je +1 */
+ 0xc3, /* ret */
+ 0xff, 0x25, 0,0,0,0 /* jmp *NOTIFY */
+ };
+
+ dfn->code = ALIGN_MALLOC( sizeof(temp), 16 );
+ memcpy (dfn->code, temp, sizeof(temp));
+ FIXUP(dfn->code, 2, 0x0, (int)&tnl->dmaptr);
+ FIXUP(dfn->code, 25, 0x0, (int)&tnl->vertex[3]);
+ FIXUP(dfn->code, 36, 0x0, (int)&tnl->counter);
+ FIXUP(dfn->code, 46, 0x0, (int)&tnl->dmaptr);
+ FIXUP(dfn->code, 51, 0x0, (int)&tnl->counter);
+ FIXUP(dfn->code, 60, 0x0, (int)&tnl->notify);
+ break;
+ }
+ case 6: {
+ static char temp[] = {
+ 0x57, /* push %edi */
+ 0x8b, 0x3d, 0, 0, 0, 0, /* mov DMAPTR,%edi */
+ 0x8b, 0x44, 0x24, 0x8, /* mov 0x8(%esp,1),%eax */
+ 0x8b, 0x54, 0x24, 0xc, /* mov 0xc(%esp,1),%edx */
+ 0x8b, 0x4c, 0x24, 0x10, /* mov 0x10(%esp,1),%ecx */
+ 0x89, 0x07, /* mov %eax,(%edi) */
+ 0x89, 0x57, 0x04, /* mov %edx,0x4(%edi) */
+ 0x89, 0x4f, 0x08, /* mov %ecx,0x8(%edi) */
+ 0xa1, 0, 0, 0, 0, /* mov VERTEX[3],%eax */
+ 0x8b, 0x15, 0, 0, 0, 0, /* mov VERTEX[4],%edx */
+ 0x8b, 0x0d, 0, 0, 0, 0, /* mov VERTEX[5],%ecx */
+ 0x89, 0x47, 0x0c, /* mov %eax,0xc(%edi) */
+ 0x89, 0x57, 0x10, /* mov %edx,0x10(%edi) */
+ 0x89, 0x4f, 0x14, /* mov %ecx,0x14(%edi) */
+ 0x83, 0xc7, 0x18, /* add $0x18,%edi */
+ 0xa1, 0, 0, 0, 0, /* mov COUNTER,%eax */
+ 0x89, 0x3d, 0, 0, 0, 0, /* mov %edi,DMAPTR */
+ 0x48, /* dec %eax */
+ 0x5f, /* pop %edi */
+ 0xa3, 0, 0, 0, 0, /* mov %eax,COUNTER */
+ 0x74, 0x01, /* je +1 */
+ 0xc3, /* ret */
+ 0xff, 0x25, 0,0,0,0, /* jmp *NOTIFY */
+ };
+
+ dfn->code = ALIGN_MALLOC( sizeof(temp), 16 );
+ memcpy (dfn->code, temp, sizeof(temp));
+ FIXUP(dfn->code, 3, 0x0, (int)&tnl->dmaptr);
+ FIXUP(dfn->code, 28, 0x0, (int)&tnl->vertex[3]);
+ FIXUP(dfn->code, 34, 0x0, (int)&tnl->vertex[4]);
+ FIXUP(dfn->code, 40, 0x0, (int)&tnl->vertex[5]);
+ FIXUP(dfn->code, 57, 0x0, (int)&tnl->counter);
+ FIXUP(dfn->code, 63, 0x0, (int)&tnl->dmaptr);
+ FIXUP(dfn->code, 70, 0x0, (int)&tnl->counter);
+ FIXUP(dfn->code, 79, 0x0, (int)&tnl->notify);
+ break;
+ }
+ default: {
+ /* Repz convenient as it's possible to emit code for any size
+ * vertex with little tweaking. Might as well read vertsize
+ * though, and have only one of these.
+ */
+ static char temp[] = {
+ 0x57, /* push %edi */
+ 0x56, /* push %esi */
+ 0xbe, 0, 0, 0, 0, /* mov $VERTEX+3,%esi */
+ 0x8b, 0x3d, 0, 0, 0, 0, /* mov DMAPTR,%edi */
+ 0x8b, 0x44, 0x24, 0x0c, /* mov 0x0c(%esp,1),%eax */
+ 0x8b, 0x54, 0x24, 0x10, /* mov 0x10(%esp,1),%edx */
+ 0x8b, 0x4c, 0x24, 0x14, /* mov 0x14(%esp,1),%ecx */
+ 0x89, 0x07, /* mov %eax,(%edi) */
+ 0x89, 0x57, 0x04, /* mov %edx,0x4(%edi) */
+ 0x89, 0x4f, 0x08, /* mov %ecx,0x8(%edi) */
+ 0x83, 0xc7, 0x0c, /* add $0xc,%edi */
+ 0xb9, 0, 0, 0, 0, /* mov $VERTSIZE-3,%ecx */
+ 0xf3, 0xa5, /* repz movsl %ds:(%esi),%es:(%edi)*/
+ 0xa1, 0, 0, 0, 0, /* mov COUNTER,%eax */
+ 0x89, 0x3d, 0, 0, 0, 0, /* mov %edi,DMAPTR */
+ 0x48, /* dec %eax */
+ 0xa3, 0, 0, 0, 0, /* mov %eax,COUNTER */
+ 0x5e, /* pop %esi */
+ 0x5f, /* pop %edi */
+ 0x74, 0x01, /* je +1 */
+ 0xc3, /* ret */
+ 0xff, 0x25, 0, 0, 0, 0 /* jmp NOTIFY */
+ };
+
+ dfn->code = ALIGN_MALLOC( sizeof(temp), 16 );
+ memcpy (dfn->code, temp, sizeof(temp));
+ FIXUP(dfn->code, 3, 0x0, (int)&tnl->vertex[3]);
+ FIXUP(dfn->code, 9, 0x0, (int)&tnl->dmaptr);
+ FIXUP(dfn->code, 37, 0x0, tnl->vertex_size-3);
+ FIXUP(dfn->code, 44, 0x0, (int)&tnl->counter);
+ FIXUP(dfn->code, 50, 0x0, (int)&tnl->dmaptr);
+ FIXUP(dfn->code, 56, 0x0, (int)&tnl->counter);
+ FIXUP(dfn->code, 67, 0x0, (int)&tnl->notify);
+ break;
+ }
+ }
+
+ insert_at_head( &tnl->dfn_cache.Vertex3f, dfn );
+ dfn->key = key;
+ return dfn;
+}
+
+
+
+struct dynfn *tnl_makeX86Vertex3fv( TNLcontext *tnl, int key )
+{
+ struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+ if (TNL_DEBUG & DEBUG_CODEGEN)
+ fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+ switch (tnl->vertex_size) {
+ case 6: {
+ static char temp[] = {
+ 0xa1, 0x00, 0x00, 0, 0, /* mov 0x0,%eax */
+ 0x8b, 0x4c, 0x24, 0x04, /* mov 0x4(%esp,1),%ecx */
+ 0x8b, 0x11, /* mov (%ecx),%edx */
+ 0x89, 0x10, /* mov %edx,(%eax) */
+ 0x8b, 0x51, 0x04, /* mov 0x4(%ecx),%edx */
+ 0x8b, 0x49, 0x08, /* mov 0x8(%ecx),%ecx */
+ 0x89, 0x50, 0x04, /* mov %edx,0x4(%eax) */
+ 0x89, 0x48, 0x08, /* mov %ecx,0x8(%eax) */
+ 0x8b, 0x15, 0x1c, 0, 0, 0, /* mov 0x1c,%edx */
+ 0x8b, 0x0d, 0x20, 0, 0, 0, /* mov 0x20,%ecx */
+ 0x89, 0x50, 0x0c, /* mov %edx,0xc(%eax) */
+ 0x89, 0x48, 0x10, /* mov %ecx,0x10(%eax) */
+ 0x8b, 0x15, 0x24, 0, 0, 0, /* mov 0x24,%edx */
+ 0x89, 0x50, 0x14, /* mov %edx,0x14(%eax) */
+ 0x83, 0xc0, 0x18, /* add $0x18,%eax */
+ 0xa3, 0x00, 0x00, 0, 0, /* mov %eax,0x0 */
+ 0xa1, 0x04, 0x00, 0, 0, /* mov 0x4,%eax */
+ 0x48, /* dec %eax */
+ 0xa3, 0x04, 0x00, 0, 0, /* mov %eax,0x4 */
+ 0x74, 0x01, /* je 2a4 <.f11> */
+ 0xc3, /* ret */
+ 0xff, 0x25, 0x08, 0, 0, 0, /* jmp *0x8 */
+ };
+
+ dfn->code = ALIGN_MALLOC( sizeof(temp), 16 );
+ memcpy (dfn->code, temp, sizeof(temp));
+ FIXUP(dfn->code, 1, 0x00000000, (int)&tnl->dmaptr);
+ FIXUP(dfn->code, 27, 0x0000001c, (int)&tnl->vertex[3]);
+ FIXUP(dfn->code, 33, 0x00000020, (int)&tnl->vertex[4]);
+ FIXUP(dfn->code, 45, 0x00000024, (int)&tnl->vertex[5]);
+ FIXUP(dfn->code, 56, 0x00000000, (int)&tnl->dmaptr);
+ FIXUP(dfn->code, 61, 0x00000004, (int)&tnl->counter);
+ FIXUP(dfn->code, 67, 0x00000004, (int)&tnl->counter);
+ FIXUP(dfn->code, 76, 0x00000008, (int)&tnl->notify);
+ break;
+ }
+
+
+ case 8: {
+ static char temp[] = {
+ 0xa1, 0x00, 0x00, 0, 0, /* mov 0x0,%eax */
+ 0x8b, 0x4c, 0x24, 0x04, /* mov 0x4(%esp,1),%ecx */
+ 0x8b, 0x11, /* mov (%ecx),%edx */
+ 0x89, 0x10, /* mov %edx,(%eax) */
+ 0x8b, 0x51, 0x04, /* mov 0x4(%ecx),%edx */
+ 0x8b, 0x49, 0x08, /* mov 0x8(%ecx),%ecx */
+ 0x89, 0x50, 0x04, /* mov %edx,0x4(%eax) */
+ 0x89, 0x48, 0x08, /* mov %ecx,0x8(%eax) */
+ 0x8b, 0x15, 0x1c, 0, 0, 0, /* mov 0x1c,%edx */
+ 0x8b, 0x0d, 0x20, 0, 0, 0, /* mov 0x20,%ecx */
+ 0x89, 0x50, 0x0c, /* mov %edx,0xc(%eax) */
+ 0x89, 0x48, 0x10, /* mov %ecx,0x10(%eax) */
+ 0x8b, 0x15, 0x1c, 0, 0, 0, /* mov 0x1c,%edx */
+ 0x8b, 0x0d, 0x20, 0, 0, 0, /* mov 0x20,%ecx */
+ 0x89, 0x50, 0x14, /* mov %edx,0x14(%eax) */
+ 0x89, 0x48, 0x18, /* mov %ecx,0x18(%eax) */
+ 0x8b, 0x15, 0x24, 0, 0, 0, /* mov 0x24,%edx */
+ 0x89, 0x50, 0x1c, /* mov %edx,0x1c(%eax) */
+ 0x83, 0xc0, 0x20, /* add $0x20,%eax */
+ 0xa3, 0x00, 0x00, 0, 0, /* mov %eax,0x0 */
+ 0xa1, 0x04, 0x00, 0, 0, /* mov 0x4,%eax */
+ 0x48, /* dec %eax */
+ 0xa3, 0x04, 0x00, 0, 0, /* mov %eax,0x4 */
+ 0x74, 0x01, /* je 2a4 <.f11> */
+ 0xc3, /* ret */
+ 0xff, 0x25, 0x08, 0, 0, 0, /* jmp *0x8 */
+ };
+
+ dfn->code = ALIGN_MALLOC( sizeof(temp), 16 );
+ memcpy (dfn->code, temp, sizeof(temp));
+ FIXUP(dfn->code, 1, 0x00000000, (int)&tnl->dmaptr);
+ FIXUP(dfn->code, 27, 0x0000001c, (int)&tnl->vertex[3]);
+ FIXUP(dfn->code, 33, 0x00000020, (int)&tnl->vertex[4]);
+ FIXUP(dfn->code, 45, 0x0000001c, (int)&tnl->vertex[5]);
+ FIXUP(dfn->code, 51, 0x00000020, (int)&tnl->vertex[6]);
+ FIXUP(dfn->code, 63, 0x00000024, (int)&tnl->vertex[7]);
+ FIXUP(dfn->code, 74, 0x00000000, (int)&tnl->dmaptr);
+ FIXUP(dfn->code, 79, 0x00000004, (int)&tnl->counter);
+ FIXUP(dfn->code, 85, 0x00000004, (int)&tnl->counter);
+ FIXUP(dfn->code, 94, 0x00000008, (int)&tnl->notify);
+ break;
+ }
+
+
+
+ default: {
+ /* Repz convenient as it's possible to emit code for any size
+ * vertex with little tweaking. Might as well read vertsize
+ * though, and have only one of these.
+ */
+ static char temp[] = {
+ 0x8b, 0x54, 0x24, 0x04, /* mov 0x4(%esp,1),%edx */
+ 0x57, /* push %edi */
+ 0x56, /* push %esi */
+ 0x8b, 0x3d, 1,1,1,1, /* mov DMAPTR,%edi */
+ 0x8b, 0x02, /* mov (%edx),%eax */
+ 0x8b, 0x4a, 0x04, /* mov 0x4(%edx),%ecx */
+ 0x8b, 0x72, 0x08, /* mov 0x8(%edx),%esi */
+ 0x89, 0x07, /* mov %eax,(%edi) */
+ 0x89, 0x4f, 0x04, /* mov %ecx,0x4(%edi) */
+ 0x89, 0x77, 0x08, /* mov %esi,0x8(%edi) */
+ 0x83, 0xc7, 0x0c, /* add $0xc,%edi */
+ 0xb9, 0x06, 0x00, 0x00, 0x00, /* mov $VERTSIZE-3,%ecx */
+ 0xbe, 0x58, 0x00, 0x00, 0x00, /* mov $VERTEX[3],%esi */
+ 0xf3, 0xa5, /* repz movsl %ds:(%esi),%es:(%edi)*/
+ 0x89, 0x3d, 1, 1, 1, 1, /* mov %edi,DMAPTR */
+ 0xa1, 2, 2, 2, 2, /* mov COUNTER,%eax */
+ 0x5e, /* pop %esi */
+ 0x5f, /* pop %edi */
+ 0x48, /* dec %eax */
+ 0xa3, 2, 2, 2, 2, /* mov %eax,COUNTER */
+ 0x74, 0x01, /* je +1 */
+ 0xc3, /* ret */
+ 0xff, 0x25, 0, 0, 0, 0 /* jmp NOTIFY */
+ };
+
+ dfn->code = ALIGN_MALLOC( sizeof(temp), 16 );
+ memcpy (dfn->code, temp, sizeof(temp));
+ FIXUP(dfn->code, 8, 0x01010101, (int)&tnl->dmaptr);
+ FIXUP(dfn->code, 32, 0x00000006, tnl->vertex_size-3);
+ FIXUP(dfn->code, 37, 0x00000058, (int)&tnl->vertex[3]);
+ FIXUP(dfn->code, 45, 0x01010101, (int)&tnl->dmaptr);
+ FIXUP(dfn->code, 50, 0x02020202, (int)&tnl->counter);
+ FIXUP(dfn->code, 58, 0x02020202, (int)&tnl->counter);
+ FIXUP(dfn->code, 67, 0x0, (int)&tnl->notify);
+ break;
+ }
+ }
+
+ insert_at_head( &tnl->dfn_cache.Vertex3fv, dfn );
+ dfn->key = key;
+ return dfn;
+}
+
+
+struct dynfn *tnl_makeX86Normal3fv( TNLcontext *tnl, int key )
+{
+ static char temp[] = {
+ 0x8b, 0x44, 0x24, 0x04, /* mov 0x4(%esp,1),%eax */
+ 0xba, 0, 0, 0, 0, /* mov $DEST,%edx */
+ 0x8b, 0x08, /* mov (%eax),%ecx */
+ 0x89, 0x0a, /* mov %ecx,(%edx) */
+ 0x8b, 0x48, 0x04, /* mov 0x4(%eax),%ecx */
+ 0x89, 0x4a, 0x04, /* mov %ecx,0x4(%edx) */
+ 0x8b, 0x48, 0x08, /* mov 0x8(%eax),%ecx */
+ 0x89, 0x4a, 0x08, /* mov %ecx,0x8(%edx) */
+ 0xc3, /* ret */
+ };
+
+ struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+ if (TNL_DEBUG & DEBUG_CODEGEN)
+ fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+ insert_at_head( &tnl->dfn_cache.Normal3fv, dfn );
+ dfn->key = key;
+ dfn->code = ALIGN_MALLOC( sizeof(temp), 16 );
+ memcpy (dfn->code, temp, sizeof(temp));
+ FIXUP(dfn->code, 5, 0x0, (int)tnl->normalptr);
+ return dfn;
+}
+
+struct dynfn *tnl_makeX86Normal3f( TNLcontext *tnl, int key )
+{
+ static char temp[] = {
+ 0xba, 0x78, 0x56, 0x34, 0x12, /* mov $DEST,%edx */
+ 0x8b, 0x44, 0x24, 0x04, /* mov 0x4(%esp,1),%eax */
+ 0x89, 0x02, /* mov %eax,(%edx) */
+ 0x8b, 0x44, 0x24, 0x08, /* mov 0x8(%esp,1),%eax */
+ 0x89, 0x42, 0x04, /* mov %eax,0x4(%edx) */
+ 0x8b, 0x44, 0x24, 0x0c, /* mov 0xc(%esp,1),%eax */
+ 0x89, 0x42, 0x08, /* mov %eax,0x8(%edx) */
+ 0xc3, /* ret */
+ };
+
+ struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+ if (TNL_DEBUG & DEBUG_CODEGEN)
+ fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+ insert_at_head( &tnl->dfn_cache.Normal3f, dfn );
+ dfn->key = key;
+ dfn->code = ALIGN_MALLOC( sizeof(temp), 16 );
+ memcpy (dfn->code, temp, sizeof(temp));
+ FIXUP(dfn->code, 1, 0x12345678, (int)tnl->normalptr);
+ return dfn;
+}
+
+struct dynfn *tnl_makeX86Color4ubv( TNLcontext *tnl, int key )
+{
+ struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+ insert_at_head( &tnl->dfn_cache.Color4ubv, dfn );
+ dfn->key = key;
+
+ if (TNL_DEBUG & DEBUG_CODEGEN)
+ fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+ if (key & TNL_CP_VC_FRMT_PKCOLOR) {
+ static char temp[] = {
+ 0x8b, 0x44, 0x24, 0x04, /* mov 0x4(%esp,1),%eax */
+ 0xba, 0x78, 0x56, 0x34, 0x12, /* mov $DEST,%edx */
+ 0x8b, 0x00, /* mov (%eax),%eax */
+ 0x89, 0x02, /* mov %eax,(%edx) */
+ 0xc3, /* ret */
+ };
+
+ dfn->code = ALIGN_MALLOC( sizeof(temp), 16 );
+ memcpy (dfn->code, temp, sizeof(temp));
+ FIXUP(dfn->code, 5, 0x12345678, (int)tnl->ubytecolorptr);
+ return dfn;
+ }
+ else {
+ static char temp[] = {
+ 0x53, /* push %ebx */
+ 0xba, 0x00, 0x00, 0x00, 0x00, /* mov $0x0,%edx */
+ 0x31, 0xc0, /* xor %eax,%eax */
+ 0x31, 0xc9, /* xor %ecx,%ecx */
+ 0x8b, 0x5c, 0x24, 0x08, /* mov 0x8(%esp,1), %ebx */
+ 0x8b, 0x1b, /* mov (%ebx), %ebx */
+ 0x88, 0xd8, /* mov %bl, %al */
+ 0x88, 0xf9, /* mov %bh, %cl */
+ 0x8b, 0x04, 0x82, /* mov (%edx,%eax,4),%eax */
+ 0x8b, 0x0c, 0x8a, /* mov (%edx,%ecx,4),%ecx */
+ 0xa3, 0xaf, 0xbe, 0xad, 0xde, /* mov %eax,0xdeadbeaf */
+ 0x89, 0x0d, 0xaf, 0xbe, 0xad, 0xde, /* mov %ecx,0xdeadbeaf */
+ 0x31, 0xc0, /* xor %eax,%eax */
+ 0x31, 0xc9, /* xor %ecx,%ecx */
+ 0xc1, 0xeb, 0x10, /* shr $0x10, %ebx */
+ 0x88, 0xd8, /* mov %bl, %al */
+ 0x88, 0xf9, /* mov %bh, %cl */
+ 0x8b, 0x04, 0x82, /* mov (%edx,%eax,4),%eax */
+ 0x8b, 0x0c, 0x8a, /* mov (%edx,%ecx,4),%ecx */
+ 0xa3, 0xaf, 0xbe, 0xad, 0xde, /* mov %eax,0xdeadbeaf */
+ 0x89, 0x0d, 0xaf, 0xbe, 0xad, 0xde, /* mov %ecx,0xdeadbeaf */
+ 0x5b, /* pop %ebx */
+ 0xc3, /* ret */
+ };
+
+ dfn->code = ALIGN_MALLOC( sizeof(temp), 16 );
+ memcpy (dfn->code, temp, sizeof(temp));
+ FIXUP(dfn->code, 2, 0x00000000, (int)_mesa_ubyte_to_float_color_tab);
+ FIXUP(dfn->code, 27, 0xdeadbeaf, (int)tnl->floatcolorptr);
+ FIXUP(dfn->code, 33, 0xdeadbeaf, (int)tnl->floatcolorptr+4);
+ FIXUP(dfn->code, 55, 0xdeadbeaf, (int)tnl->floatcolorptr+8);
+ FIXUP(dfn->code, 61, 0xdeadbeaf, (int)tnl->floatcolorptr+12);
+ return dfn;
+ }
+}
+
+struct dynfn *tnl_makeX86Color4ub( TNLcontext *tnl, int key )
+{
+ if (TNL_DEBUG & DEBUG_CODEGEN)
+ fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+ if (key & TNL_CP_VC_FRMT_PKCOLOR) {
+ /* XXX push/pop */
+ static char temp[] = {
+ 0x53, /* push %ebx */
+ 0x8b, 0x44, 0x24, 0x08, /* mov 0x8(%esp,1),%eax */
+ 0x8b, 0x54, 0x24, 0x0c, /* mov 0xc(%esp,1),%edx */
+ 0x8b, 0x4c, 0x24, 0x10, /* mov 0x10(%esp,1),%ecx */
+ 0x8b, 0x5c, 0x24, 0x14, /* mov 0x14(%esp,1),%ebx */
+ 0xa2, 0, 0, 0, 0, /* mov %al,DEST */
+ 0x88, 0x15, 0, 0, 0, 0, /* mov %dl,DEST+1 */
+ 0x88, 0x0d, 0, 0, 0, 0, /* mov %cl,DEST+2 */
+ 0x88, 0x1d, 0, 0, 0, 0, /* mov %bl,DEST+3 */
+ 0x5b, /* pop %ebx */
+ 0xc3, /* ret */
+ };
+
+ struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+ insert_at_head( &tnl->dfn_cache.Color4ub, dfn );
+ dfn->key = key;
+
+ dfn->code = ALIGN_MALLOC( sizeof(temp), 16 );
+ memcpy (dfn->code, temp, sizeof(temp));
+ FIXUP(dfn->code, 18, 0x0, (int)tnl->ubytecolorptr);
+ FIXUP(dfn->code, 24, 0x0, (int)tnl->ubytecolorptr+1);
+ FIXUP(dfn->code, 30, 0x0, (int)tnl->ubytecolorptr+2);
+ FIXUP(dfn->code, 36, 0x0, (int)tnl->ubytecolorptr+3);
+ return dfn;
+ }
+ else
+ return 0;
+}
+
+
+struct dynfn *tnl_makeX86Color3fv( TNLcontext *tnl, int key )
+{
+ if (key & (TNL_CP_VC_FRMT_PKCOLOR|TNL_CP_VC_FRMT_FPALPHA))
+ return 0;
+ else
+ {
+ static char temp[] = {
+ 0x8b, 0x44, 0x24, 0x04, /* mov 0x4(%esp,1),%eax */
+ 0xba, 0, 0, 0, 0, /* mov $DEST,%edx */
+ 0x8b, 0x08, /* mov (%eax),%ecx */
+ 0x89, 0x0a, /* mov %ecx,(%edx) */
+ 0x8b, 0x48, 0x04, /* mov 0x4(%eax),%ecx */
+ 0x89, 0x4a, 0x04, /* mov %ecx,0x4(%edx) */
+ 0x8b, 0x48, 0x08, /* mov 0x8(%eax),%ecx */
+ 0x89, 0x4a, 0x08, /* mov %ecx,0x8(%edx) */
+ 0xc3, /* ret */
+ };
+
+ struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+ if (TNL_DEBUG & DEBUG_CODEGEN)
+ fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+ insert_at_head( &tnl->dfn_cache.Color3fv, dfn );
+ dfn->key = key;
+ dfn->code = ALIGN_MALLOC( sizeof(temp), 16 );
+ memcpy (dfn->code, temp, sizeof(temp));
+ FIXUP(dfn->code, 5, 0x0, (int)tnl->floatcolorptr);
+ return dfn;
+ }
+}
+
+struct dynfn *tnl_makeX86Color3f( TNLcontext *tnl, int key )
+{
+ if (key & (TNL_CP_VC_FRMT_PKCOLOR|TNL_CP_VC_FRMT_FPALPHA))
+ return 0;
+ else
+ {
+ static char temp[] = {
+ 0xba, 0x78, 0x56, 0x34, 0x12, /* mov $DEST,%edx */
+ 0x8b, 0x44, 0x24, 0x04, /* mov 0x4(%esp,1),%eax */
+ 0x89, 0x02, /* mov %eax,(%edx) */
+ 0x8b, 0x44, 0x24, 0x08, /* mov 0x8(%esp,1),%eax */
+ 0x89, 0x42, 0x04, /* mov %eax,0x4(%edx) */
+ 0x8b, 0x44, 0x24, 0x0c, /* mov 0xc(%esp,1),%eax */
+ 0x89, 0x42, 0x08, /* mov %eax,0x8(%edx) */
+ 0xc3, /* ret */
+ };
+
+ struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+ if (TNL_DEBUG & DEBUG_CODEGEN)
+ fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+ insert_at_head( &tnl->dfn_cache.Color3f, dfn );
+ dfn->key = key;
+ dfn->code = ALIGN_MALLOC( sizeof(temp), 16 );
+ memcpy (dfn->code, temp, sizeof(temp));
+ FIXUP(dfn->code, 1, 0x12345678, (int)tnl->floatcolorptr);
+ return dfn;
+ }
+}
+
+
+
+struct dynfn *tnl_makeX86TexCoord2fv( TNLcontext *tnl, int key )
+{
+ static char temp[] = {
+ 0x8b, 0x44, 0x24, 0x04, /* mov 0x4(%esp,1),%eax */
+ 0xba, 0x78, 0x56, 0x34, 0x12, /* mov $DEST,%edx */
+ 0x8b, 0x08, /* mov (%eax),%ecx */
+ 0x8b, 0x40, 0x04, /* mov 0x4(%eax),%eax */
+ 0x89, 0x0a, /* mov %ecx,(%edx) */
+ 0x89, 0x42, 0x04, /* mov %eax,0x4(%edx) */
+ 0xc3, /* ret */
+ };
+
+ struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+ if (TNL_DEBUG & DEBUG_CODEGEN)
+ fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+ insert_at_head( &tnl->dfn_cache.TexCoord2fv, dfn );
+ dfn->key = key;
+ dfn->code = ALIGN_MALLOC( sizeof(temp), 16 );
+ memcpy (dfn->code, temp, sizeof(temp));
+ FIXUP(dfn->code, 5, 0x12345678, (int)tnl->texcoordptr[0]);
+ return dfn;
+}
+
+struct dynfn *tnl_makeX86TexCoord2f( TNLcontext *tnl, int key )
+{
+ static char temp[] = {
+ 0xba, 0x78, 0x56, 0x34, 0x12, /* mov $DEST,%edx */
+ 0x8b, 0x44, 0x24, 0x04, /* mov 0x4(%esp,1),%eax */
+ 0x8b, 0x4c, 0x24, 0x08, /* mov 0x8(%esp,1),%ecx */
+ 0x89, 0x02, /* mov %eax,(%edx) */
+ 0x89, 0x4a, 0x04, /* mov %ecx,0x4(%edx) */
+ 0xc3, /* ret */
+ };
+
+ struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+ if (TNL_DEBUG & DEBUG_CODEGEN)
+ fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+ insert_at_head( &tnl->dfn_cache.TexCoord2f, dfn );
+ dfn->key = key;
+ dfn->code = ALIGN_MALLOC( sizeof(temp), 16 );
+ memcpy (dfn->code, temp, sizeof(temp));
+ FIXUP(dfn->code, 1, 0x12345678, (int)tnl->texcoordptr[0]);
+ return dfn;
+}
+
+struct dynfn *tnl_makeX86MultiTexCoord2fvARB( TNLcontext *tnl, int key )
+{
+ static char temp[] = {
+ 0x8b, 0x44, 0x24, 0x04, /* mov 0x4(%esp,1),%eax */
+ 0x8b, 0x4c, 0x24, 0x08, /* mov 0x8(%esp,1),%ecx */
+ 0x2d, 0xc0, 0x84, 0x00, 0x00, /* sub $0x84c0,%eax */
+ 0x83, 0xe0, 0x01, /* and $0x1,%eax */
+ 0x8b, 0x11, /* mov (%ecx),%edx */
+ 0xc1, 0xe0, 0x03, /* shl $0x3,%eax */
+ 0x8b, 0x49, 0x04, /* mov 0x4(%ecx),%ecx */
+ 0x89, 0x90, 0, 0, 0, 0,/* mov %edx,DEST(%eax) */
+ 0x89, 0x88, 0, 0, 0, 0,/* mov %ecx,DEST+8(%eax) */
+ 0xc3, /* ret */
+ };
+
+ static char temp2[] = {
+ 0x8b, 0x44, 0x24, 0x04, /* mov 0x4(%esp,1),%eax */
+ 0x8b, 0x4c, 0x24, 0x08, /* mov 0x8(%esp,1),%ecx */
+ 0x2d, 0xc0, 0x84, 0x00, 0x00, /* sub $0x84c0,%eax */
+ 0x83, 0xe0, 0x01, /* and $0x1,%eax */
+ 0x8b, 0x14, 0x85, 0, 0, 0, 0, /* mov DEST(,%eax,4),%edx */
+ 0x8b, 0x01, /* mov (%ecx),%eax */
+ 0x89, 0x02, /* mov %eax,(%edx) */
+ 0x8b, 0x41, 0x04, /* mov 0x4(%ecx),%eax */
+ 0x89, 0x42, 0x04, /* mov %eax,0x4(%edx) */
+ 0xc3, /* ret */
+ };
+
+ struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+ if (TNL_DEBUG & DEBUG_CODEGEN)
+ fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+ insert_at_head( &tnl->dfn_cache.MultiTexCoord2fvARB, dfn );
+ dfn->key = key;
+
+ if ((key & (TNL_CP_VC_FRMT_ST0|TNL_CP_VC_FRMT_ST1)) ==
+ (TNL_CP_VC_FRMT_ST0|TNL_CP_VC_FRMT_ST1)) {
+ dfn->code = ALIGN_MALLOC( sizeof(temp), 16 );
+ memcpy (dfn->code, temp, sizeof(temp));
+ FIXUP(dfn->code, 26, 0x0, (int)tnl->texcoordptr[0]);
+ FIXUP(dfn->code, 32, 0x0, (int)tnl->texcoordptr[0]+4);
+ } else {
+ dfn->code = ALIGN_MALLOC( sizeof(temp2), 16 );
+ memcpy (dfn->code, temp2, sizeof(temp2));
+ FIXUP(dfn->code, 19, 0x0, (int)tnl->texcoordptr);
+ }
+ return dfn;
+}
+
+struct dynfn *tnl_makeX86MultiTexCoord2fARB( TNLcontext *tnl,
+ int key )
+{
+ static char temp[] = {
+ 0x8b, 0x44, 0x24, 0x04, /* mov 0x4(%esp,1),%eax */
+ 0x8b, 0x54, 0x24, 0x08, /* mov 0x8(%esp,1),%edx */
+ 0x2d, 0xc0, 0x84, 0x00, 0x00, /* sub $0x84c0,%eax */
+ 0x8b, 0x4c, 0x24, 0x0c, /* mov 0xc(%esp,1),%ecx */
+ 0x83, 0xe0, 0x01, /* and $0x1,%eax */
+ 0xc1, 0xe0, 0x03, /* shl $0x3,%eax */
+ 0x89, 0x90, 0, 0, 0, 0, /* mov %edx,DEST(%eax) */
+ 0x89, 0x88, 0, 0, 0, 0, /* mov %ecx,DEST+8(%eax) */
+ 0xc3, /* ret */
+ };
+
+ static char temp2[] = {
+ 0x8b, 0x44, 0x24, 0x04, /* mov 0x4(%esp,1),%eax */
+ 0x8b, 0x54, 0x24, 0x08, /* mov 0x8(%esp,1),%edx */
+ 0x2d, 0xc0, 0x84, 0x00, 0x00, /* sub $0x84c0,%eax */
+ 0x8b, 0x4c, 0x24, 0x0c, /* mov 0xc(%esp,1),%ecx */
+ 0x83, 0xe0, 0x01, /* and $0x1,%eax */
+ 0x8b, 0x04, 0x85, 0, 0, 0, 0, /* mov DEST(,%eax,4),%eax */
+ 0x89, 0x10, /* mov %edx,(%eax) */
+ 0x89, 0x48, 0x04, /* mov %ecx,0x4(%eax) */
+ 0xc3, /* ret */
+ };
+
+ struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+ if (TNL_DEBUG & DEBUG_CODEGEN)
+ fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+ insert_at_head( &tnl->dfn_cache.MultiTexCoord2fARB, dfn );
+ dfn->key = key;
+
+ if ((key & (TNL_CP_VC_FRMT_ST0|TNL_CP_VC_FRMT_ST1)) ==
+ (TNL_CP_VC_FRMT_ST0|TNL_CP_VC_FRMT_ST1)) {
+ dfn->code = ALIGN_MALLOC( sizeof(temp), 16 );
+ memcpy (dfn->code, temp, sizeof(temp));
+ FIXUP(dfn->code, 25, 0x0, (int)tnl->texcoordptr[0]);
+ FIXUP(dfn->code, 31, 0x0, (int)tnl->texcoordptr[0]+4);
+ }
+ else {
+ /* Note: this might get generated multiple times, even though the
+ * actual emitted code is the same.
+ */
+ dfn->code = ALIGN_MALLOC( sizeof(temp2), 16 );
+ memcpy (dfn->code, temp2, sizeof(temp2));
+ FIXUP(dfn->code, 23, 0x0, (int)tnl->texcoordptr);
+ }
+ return dfn;
+}
+
+
+void _tnl_InitX86Codegen( struct dfn_generators *gen )
+{
+ gen->Vertex3f = tnl_makeX86Vertex3f;
+ gen->Vertex3fv = tnl_makeX86Vertex3fv;
+ gen->Color4ub = tnl_makeX86Color4ub; /* PKCOLOR only */
+ gen->Color4ubv = tnl_makeX86Color4ubv; /* PKCOLOR only */
+ gen->Normal3f = tnl_makeX86Normal3f;
+ gen->Normal3fv = tnl_makeX86Normal3fv;
+ gen->TexCoord2f = tnl_makeX86TexCoord2f;
+ gen->TexCoord2fv = tnl_makeX86TexCoord2fv;
+ gen->MultiTexCoord2fARB = tnl_makeX86MultiTexCoord2fARB;
+ gen->MultiTexCoord2fvARB = tnl_makeX86MultiTexCoord2fvARB;
+ gen->Color3f = tnl_makeX86Color3f;
+ gen->Color3fv = tnl_makeX86Color3fv;
+
+ /* Not done:
+ */
+/* gen->Vertex2f = tnl_makeX86Vertex2f; */
+/* gen->Vertex2fv = tnl_makeX86Vertex2fv; */
+/* gen->Color3ub = tnl_makeX86Color3ub; */
+/* gen->Color3ubv = tnl_makeX86Color3ubv; */
+/* gen->Color4f = tnl_makeX86Color4f; */
+/* gen->Color4fv = tnl_makeX86Color4fv; */
+/* gen->TexCoord1f = tnl_makeX86TexCoord1f; */
+/* gen->TexCoord1fv = tnl_makeX86TexCoord1fv; */
+/* gen->MultiTexCoord1fARB = tnl_makeX86MultiTexCoord1fARB; */
+/* gen->MultiTexCoord1fvARB = tnl_makeX86MultiTexCoord1fvARB; */
+}
+
+
+#else
+
+void _tnl_InitX86Codegen( struct dfn_generators *gen )
+{
+ (void) gen;
+}
+
+#endif