From c8100a02d28c8a424f69723778abebd950914bc6 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keith@tungstengraphics.com>
Date: Mon, 29 Mar 2004 11:05:02 +0000
Subject: First round of codegen for t_vtx_api.c -- ie the Begin/Vertex/End
 code. Enable with env var:  MESA_CODEGEN=t.

---
 src/mesa/sources             |   5 +-
 src/mesa/tnl/t_context.c     |   4 +
 src/mesa/tnl/t_context.h     |  28 +-
 src/mesa/tnl/t_vtx_api.c     | 749 ++++++++++++++-----------------------------
 src/mesa/tnl/t_vtx_api.h     |  26 ++
 src/mesa/tnl/t_vtx_x86_gcc.S | 327 +++++++++++--------
 6 files changed, 496 insertions(+), 643 deletions(-)

(limited to 'src')

diff --git a/src/mesa/sources b/src/mesa/sources
index aeb8559fa2..21727aa574 100644
--- a/src/mesa/sources
+++ b/src/mesa/sources
@@ -132,6 +132,8 @@ TNL_SOURCES = \
 	tnl/t_vb_vertex.c \
 	tnl/t_vertex.c \
 	tnl/t_vtx_api.c \
+	tnl/t_vtx_generic.c \
+	tnl/t_vtx_x86.c \
 	tnl/t_vtx_eval.c \
 	tnl/t_vtx_exec.c 
 
@@ -159,7 +161,8 @@ X86_SOURCES =			\
 	x86/sse_xform2.S	\
 	x86/sse_xform3.S	\
 	x86/sse_xform4.S	\
-	x86/sse_normal.S
+	x86/sse_normal.S \
+	tnl/t_vtx_x86_gcc.S
 
 SPARC_SOURCES =			\
 	sparc/clip.S		\
diff --git a/src/mesa/tnl/t_context.c b/src/mesa/tnl/t_context.c
index 2c529afd79..330c19d649 100644
--- a/src/mesa/tnl/t_context.c
+++ b/src/mesa/tnl/t_context.c
@@ -121,6 +121,10 @@ _tnl_CreateContext( GLcontext *ctx )
    tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
    tnl->Driver.NotifyMaterialChange = _mesa_validate_all_lighting_tables;
    
+
+   if (getenv("MESA_CODEGEN"))
+      tnl->AllowCodegen = GL_TRUE;
+
    return GL_TRUE;
 }
 
diff --git a/src/mesa/tnl/t_context.h b/src/mesa/tnl/t_context.h
index 1293db3bf3..ec5675faf9 100644
--- a/src/mesa/tnl/t_context.h
+++ b/src/mesa/tnl/t_context.h
@@ -248,10 +248,28 @@ struct tnl_copied_vtx {
 
 #define VERT_BUFFER_SIZE 2048	/* 8kbytes */
 
-#define ERROR_ATTRIB _TNL_ATTRIB_MAX /* error path for t_vtx_api.c */
 
 typedef void (*attrfv_func)( const GLfloat * );
 
+struct dynfn {
+   struct dynfn *next, *prev;
+   int key;
+   char *code;
+};
+
+struct dynfn_lists {
+   struct dynfn Vertex[4];
+   struct dynfn Attribute[4];
+};
+
+struct dynfn_generators {
+   struct dynfn *(*Vertex[4])( GLcontext *ctx, int key );
+   struct dynfn *(*Attribute[4])( GLcontext *ctx, int key );
+};
+
+#define _TNL_MAX_ATTR_CODEGEN 16 
+
+
 /* The assembly of vertices in immediate mode is separated from
  * display list compilation.  This allows a simpler immediate mode
  * treatment and a display list compiler better suited to
@@ -269,7 +287,12 @@ struct tnl_vtx {
    GLfloat *current[_TNL_ATTRIB_MAX]; /* points into ctx->Current, etc */
    GLuint counter, initial_counter;
    struct tnl_copied_vtx copied;
-   attrfv_func tabfv[_TNL_ATTRIB_MAX+1][4]; /* +1 for ERROR_ATTRIB */
+
+   attrfv_func tabfv[_TNL_MAX_ATTR_CODEGEN+1][4]; /* plus 1 for ERROR_ATTRIB */
+
+   struct dynfn_lists cache;
+   struct dynfn_generators gen;
+
    struct tnl_eval eval;
    GLboolean *edgeflag_tmp;
    GLboolean have_materials;
@@ -714,6 +737,7 @@ typedef struct
    GLboolean IsolateMaterials;
    GLboolean AllowVertexFog;
    GLboolean AllowPixelFog;
+   GLboolean AllowCodegen;
 
    GLboolean _DoVertexFog;  /* eval fog function at each vertex? */
 
diff --git a/src/mesa/tnl/t_vtx_api.c b/src/mesa/tnl/t_vtx_api.c
index 4f07e2cc7e..1756617f5c 100644
--- a/src/mesa/tnl/t_vtx_api.c
+++ b/src/mesa/tnl/t_vtx_api.c
@@ -41,9 +41,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "api_arrayelt.h"
 #include "api_noop.h"
 #include "t_vtx_api.h"
+#include "simple_list.h"
 
+static void reset_attrfv( TNLcontext *tnl );
 
-static void init_attrfv( TNLcontext *tnl );
+static attrfv_func choose[_TNL_MAX_ATTR_CODEGEN+1][4]; /* +1 for ERROR_ATTRIB */
+static attrfv_func generic_attr_func[_TNL_MAX_ATTR_CODEGEN][4];
 
 
 /* Close off the last primitive, execute the buffer, restart the
@@ -100,7 +103,7 @@ static void _tnl_wrap_buffers( GLcontext *ctx )
 /* Deal with buffer wrapping where provoked by the vertex buffer
  * filling up, as opposed to upgrade_vertex().
  */
-static void _tnl_wrap_filled_vertex( GLcontext *ctx )
+void _tnl_wrap_filled_vertex( GLcontext *ctx )
 {
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    GLfloat *data = tnl->vtx.copied.buffer;
@@ -156,7 +159,8 @@ static void _tnl_copy_to_current( GLcontext *ctx )
    /* Colormaterial -- this kindof sucks.
     */
    if (ctx->Light.ColorMaterialEnabled) {
-      _mesa_update_color_material(ctx, ctx->Current.Attrib[VERT_ATTRIB_COLOR0]);
+      _mesa_update_color_material(ctx, 
+				  ctx->Current.Attrib[VERT_ATTRIB_COLOR0]);
    }
 
    if (tnl->vtx.have_materials) {
@@ -204,7 +208,6 @@ static void _tnl_wrap_upgrade_vertex( GLcontext *ctx,
    GLfloat *tmp;
    GLint lastcount = tnl->vtx.initial_counter - tnl->vtx.counter;
 
-
    /* Run pipeline on current vertices, copy wrapped vertices
     * to tnl->vtx.copied.
     */
@@ -222,10 +225,10 @@ static void _tnl_wrap_upgrade_vertex( GLcontext *ctx,
     * begin/end so that they don't bloat the vertices.
     */
    if (ctx->Driver.CurrentExecPrimitive == PRIM_OUTSIDE_BEGIN_END &&
-       tnl->vtx.attrsz[attr] == 0 
-       && lastcount > 8
-      ) {
-      init_attrfv( tnl );
+       tnl->vtx.attrsz[attr] == 0 && 
+       lastcount > 8 &&
+       tnl->vtx.vertex_size) {
+      reset_attrfv( tnl );
    }
 
    /* Fix up sizes:
@@ -289,6 +292,19 @@ static void _tnl_wrap_upgrade_vertex( GLcontext *ctx,
       tnl->vtx.counter -= tnl->vtx.copied.nr;
       tnl->vtx.copied.nr = 0;
    }
+
+   /* For codegen - attrptr's may have changed, so need to redo
+    * codegen.  Might be a reasonable place to try & detect attributes
+    * in the vertex which aren't being submitted any more.
+    */
+   for (i = 0 ; i < _TNL_ATTRIB_MAX ; i++) 
+      if (tnl->vtx.attrsz[i]) {
+	 GLuint j = tnl->vtx.attrsz[i] - 1;
+
+	 if (i < _TNL_MAX_ATTR_CODEGEN)
+	    tnl->vtx.tabfv[i][j] = choose[i][j];
+      }
+
 }
 
 
@@ -314,146 +330,131 @@ static void _tnl_fixup_vertex( GLcontext *ctx, GLuint attr, GLuint sz )
 }
 
 
+static struct dynfn *lookup( struct dynfn *l, GLuint key )
+{
+   struct dynfn *f;
+
+   foreach( f, l ) {
+      if (f->key == key) 
+	 return f;
+   }
+
+   return 0;
+}
+
+
+static attrfv_func do_codegen( GLcontext *ctx, GLuint attr, GLuint sz )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx); 
+   struct dynfn *dfn = 0;
 
+   if (attr == 0) {
+      GLuint key = tnl->vtx.vertex_size;
+
+      dfn = lookup( &tnl->vtx.cache.Vertex[sz-1], key );
+
+      if (!dfn)
+	 dfn = tnl->vtx.gen.Vertex[sz-1]( ctx, key );
+   }
+   else {
+      GLuint key = (GLuint) tnl->vtx.attrptr[attr];
+
+      dfn = lookup( &tnl->vtx.cache.Attribute[sz-1], key );
+
+      if (!dfn)
+	 dfn = tnl->vtx.gen.Attribute[sz-1]( ctx, key );
+   }
+
+   if (dfn) 
+      return (attrfv_func) dfn->code;
+   else
+      return 0;
+}
 
 /* Helper function for 'CHOOSE' macro.  Do what's necessary when an
  * entrypoint is called for the first time.
  */
-static void do_choose( GLuint attr, GLuint sz, 
-			void (*fallback_attr_func)( const GLfloat *),
-			void (*choose1)( const GLfloat *),
-			void (*choose2)( const GLfloat *),
-			void (*choose3)( const GLfloat *),
-			void (*choose4)( const GLfloat *),
-			const GLfloat *v )
+
+static attrfv_func do_choose( GLuint attr, GLuint sz )
 { 
    GET_CURRENT_CONTEXT( ctx ); 
    TNLcontext *tnl = TNL_CONTEXT(ctx); 
+   GLuint oldsz = tnl->vtx.attrsz[attr];
+
+   assert(attr < _TNL_MAX_ATTR_CODEGEN);
 
-   if (tnl->vtx.attrsz[attr] != sz)
+   if (oldsz != sz) {
+      /* Reset any active pointers for this attribute 
+       */
+      if (oldsz)
+	 tnl->vtx.tabfv[attr][oldsz-1] = choose[attr][oldsz-1];
+   
       _tnl_fixup_vertex( ctx, attr, sz );
  
-   /* Does this belong here?  Necessitates resetting vtxfmt on each
-    * flush (otherwise flags won't get reset afterwards).
-    */
-   if (attr == 0)
-      ctx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
-   else
-      ctx->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT;
+      /* Does setting NeedFlush belong here?  Necessitates resetting
+       * vtxfmt on each flush (otherwise flags won't get reset
+       * afterwards).
+       */
+      if (attr == 0) 
+	 ctx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+      else 
+	 ctx->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT;
+   }
 
-   /* Reset any active pointers for this attribute 
-    */
-   tnl->vtx.tabfv[attr][0] = choose1;
-   tnl->vtx.tabfv[attr][1] = choose2;
-   tnl->vtx.tabfv[attr][2] = choose3;
-   tnl->vtx.tabfv[attr][3] = choose4;
 
-   /* Update the secondary dispatch table with the new function
+   /* Try to use codegen:
+    */   
+   if (tnl->AllowCodegen)
+      tnl->vtx.tabfv[attr][sz-1] = do_codegen( ctx, attr, sz );
+   else
+      tnl->vtx.tabfv[attr][sz-1] = 0;
+
+   /* Else use generic version:
     */
-   tnl->vtx.tabfv[attr][sz-1] = fallback_attr_func;
+   if (!tnl->vtx.tabfv[attr][sz-1])
+      tnl->vtx.tabfv[attr][sz-1] = generic_attr_func[attr][sz-1];
 
-   (*fallback_attr_func)(v);
+   return tnl->vtx.tabfv[attr][sz-1];
 }
 
 
-/* Versions of all the entrypoints for situations where codegen isn't
- * available.  
- *
- * Note: Only one size for each attribute may be active at once.
- * Eg. if Color3f is installed/active, then Color4f may not be, even
- * if the vertex actually contains 4 color coordinates.  This is
- * because the 3f version won't otherwise set color[3] to 1.0 -- this
- * is the job of the chooser function when switching between Color4f
- * and Color3f.
- */
-#define ATTRFV( ATTR, N )				\
-static void choose_##ATTR##_##N( const GLfloat *v );	\
-							\
-static void attrib_##ATTR##_##N( const GLfloat *v )	\
-{							\
-   GET_CURRENT_CONTEXT( ctx );				\
-   TNLcontext *tnl = TNL_CONTEXT(ctx);			\
-							\
-   if ((ATTR) == 0) {					\
-      GLuint i;						\
-							\
-      if (N>0) tnl->vtx.vbptr[0] = v[0];		\
-      if (N>1) tnl->vtx.vbptr[1] = v[1];		\
-      if (N>2) tnl->vtx.vbptr[2] = v[2];		\
-      if (N>3) tnl->vtx.vbptr[3] = v[3];		\
-							\
-      for (i = N; i < tnl->vtx.vertex_size; i++)	\
-	 tnl->vtx.vbptr[i] = tnl->vtx.vertex[i];	\
-							\
-      tnl->vtx.vbptr += tnl->vtx.vertex_size;		\
-							\
-      if (--tnl->vtx.counter == 0)			\
-	 _tnl_wrap_filled_vertex( ctx );		\
-   }							\
-   else {						\
-      GLfloat *dest = tnl->vtx.attrptr[ATTR];		\
-      if (N>0) dest[0] = v[0];				\
-      if (N>1) dest[1] = v[1];				\
-      if (N>2) dest[2] = v[2];				\
-      if (N>3) dest[3] = v[3];				\
-   }							\
-}
 
 #define CHOOSE( ATTR, N )				\
 static void choose_##ATTR##_##N( const GLfloat *v )	\
 {							\
-   do_choose(ATTR, N,					\
-	     attrib_##ATTR##_##N,			\
-	     choose_##ATTR##_1,				\
-	     choose_##ATTR##_2,				\
-	     choose_##ATTR##_3,				\
-	     choose_##ATTR##_4,				\
-	     v );					\
-}
-
-#define INIT(ATTR)					\
-static void init_##ATTR( TNLcontext *tnl )		\
-{							\
-   tnl->vtx.tabfv[ATTR][0] = choose_##ATTR##_1;		\
-   tnl->vtx.tabfv[ATTR][1] = choose_##ATTR##_2;		\
-   tnl->vtx.tabfv[ATTR][2] = choose_##ATTR##_3; 	\
-   tnl->vtx.tabfv[ATTR][3] = choose_##ATTR##_4;		\
+   attrfv_func f = do_choose(ATTR, N);			\
+   f( v );						\
 }
-   
 
-#define ATTRS( ATTRIB )				\
-   ATTRFV( ATTRIB, 1 )				\
-   ATTRFV( ATTRIB, 2 )				\
-   ATTRFV( ATTRIB, 3 )				\
-   ATTRFV( ATTRIB, 4 )				\
+#define CHOOSERS( ATTRIB ) \
    CHOOSE( ATTRIB, 1 )				\
    CHOOSE( ATTRIB, 2 )				\
    CHOOSE( ATTRIB, 3 )				\
    CHOOSE( ATTRIB, 4 )				\
-   INIT( ATTRIB )				\
 
 
-/* Generate a lot of functions.  These are the actual worker
- * functions, which are equivalent to those generated via codegen
- * elsewhere.
- */
-ATTRS( 0 )
-ATTRS( 1 )
-ATTRS( 2 )
-ATTRS( 3 )
-ATTRS( 4 )
-ATTRS( 5 )
-ATTRS( 6 )
-ATTRS( 7 )
-ATTRS( 8 )
-ATTRS( 9 )
-ATTRS( 10 )
-ATTRS( 11 )
-ATTRS( 12 )
-ATTRS( 13 )
-ATTRS( 14 )
-ATTRS( 15 )
-
+#define INIT_CHOOSERS(ATTR)				\
+   choose[ATTR][0] = choose_##ATTR##_1;				\
+   choose[ATTR][1] = choose_##ATTR##_2;				\
+   choose[ATTR][2] = choose_##ATTR##_3;				\
+   choose[ATTR][3] = choose_##ATTR##_4;
+
+CHOOSERS( 0 )
+CHOOSERS( 1 )
+CHOOSERS( 2 )
+CHOOSERS( 3 )
+CHOOSERS( 4 )
+CHOOSERS( 5 )
+CHOOSERS( 6 )
+CHOOSERS( 7 )
+CHOOSERS( 8 )
+CHOOSERS( 9 )
+CHOOSERS( 10 )
+CHOOSERS( 11 )
+CHOOSERS( 12 )
+CHOOSERS( 13 )
+CHOOSERS( 14 )
+CHOOSERS( 15 )
 
 static void error_attrib( const GLfloat *unused )
 {
@@ -462,326 +463,25 @@ static void error_attrib( const GLfloat *unused )
    _mesa_error( ctx, GL_INVALID_ENUM, "glVertexAttrib" );
 }   
 
-static void init_error_attrib( TNLcontext *tnl )
-{
-   tnl->vtx.tabfv[ERROR_ATTRIB][0] = error_attrib;
-   tnl->vtx.tabfv[ERROR_ATTRIB][1] = error_attrib;
-   tnl->vtx.tabfv[ERROR_ATTRIB][2] = error_attrib;
-   tnl->vtx.tabfv[ERROR_ATTRIB][3] = error_attrib;
-}
-
 
 
-static void init_attrfv( TNLcontext *tnl )
+static void reset_attrfv( TNLcontext *tnl )
 {   
-   if (tnl->vtx.vertex_size) {
-      GLuint i;
-      
-      init_0( tnl );
-      init_1( tnl );
-      init_2( tnl );
-      init_3( tnl );
-      init_4( tnl );
-      init_5( tnl );
-      init_6( tnl );
-      init_7( tnl );
-      init_8( tnl );
-      init_9( tnl );
-      init_10( tnl );
-      init_11( tnl );
-      init_12( tnl );
-      init_13( tnl );
-      init_14( tnl );
-      init_15( tnl );
-      init_error_attrib( tnl );
-
-      for (i = 0 ; i < _TNL_ATTRIB_MAX ; i++) 
-	 tnl->vtx.attrsz[i] = 0;
-
-      tnl->vtx.vertex_size = 0;
-      tnl->vtx.have_materials = 0;
-   }
-}
-
-/* These can be made efficient with codegen.  Further, by adding more
- * logic to do_choose(), the double-dispatch for legacy entrypoints
- * like glVertex3f() can be removed.
- */
-#define DISPATCH_ATTRFV( ATTR, COUNT, P )	\
-do {						\
-   GET_CURRENT_CONTEXT( ctx ); 			\
-   TNLcontext *tnl = TNL_CONTEXT(ctx); 		\
-   tnl->vtx.tabfv[ATTR][COUNT-1]( P );		\
-} while (0)
-
-#define DISPATCH_ATTR1FV( ATTR, V ) DISPATCH_ATTRFV( ATTR, 1, V )
-#define DISPATCH_ATTR2FV( ATTR, V ) DISPATCH_ATTRFV( ATTR, 2, V )
-#define DISPATCH_ATTR3FV( ATTR, V ) DISPATCH_ATTRFV( ATTR, 3, V )
-#define DISPATCH_ATTR4FV( ATTR, V ) DISPATCH_ATTRFV( ATTR, 4, V )
-
-#define DISPATCH_ATTR1F( ATTR, S ) DISPATCH_ATTRFV( ATTR, 1, &(S) )
-
-#define DISPATCH_ATTR2F( ATTR, S,T ) 		\
-do { 						\
-   GLfloat v[2]; 				\
-   v[0] = S; v[1] = T;				\
-   DISPATCH_ATTR2FV( ATTR, v );			\
-} while (0)
-#define DISPATCH_ATTR3F( ATTR, S,T,R ) 		\
-do { 						\
-   GLfloat v[3]; 				\
-   v[0] = S; v[1] = T; v[2] = R;		\
-   DISPATCH_ATTR3FV( ATTR, v );			\
-} while (0)
-#define DISPATCH_ATTR4F( ATTR, S,T,R,Q )	\
-do { 						\
-   GLfloat v[4]; 				\
-   v[0] = S; v[1] = T; v[2] = R; v[3] = Q;	\
-   DISPATCH_ATTR4FV( ATTR, v );			\
-} while (0)
-
-
-static void enum_error( void )
-{
-   GET_CURRENT_CONTEXT( ctx );
-   _mesa_error( ctx, GL_INVALID_ENUM, "glVertexAttrib" );
-}
-
-static void GLAPIENTRY _tnl_Vertex2f( GLfloat x, GLfloat y )
-{
-   DISPATCH_ATTR2F( _TNL_ATTRIB_POS, x, y );
-}
-
-static void GLAPIENTRY _tnl_Vertex2fv( const GLfloat *v )
-{
-   DISPATCH_ATTR2FV( _TNL_ATTRIB_POS, v );
-}
-
-static void GLAPIENTRY _tnl_Vertex3f( GLfloat x, GLfloat y, GLfloat z )
-{
-   DISPATCH_ATTR3F( _TNL_ATTRIB_POS, x, y, z );
-}
-
-static void GLAPIENTRY _tnl_Vertex3fv( const GLfloat *v )
-{
-   DISPATCH_ATTR3FV( _TNL_ATTRIB_POS, v );
-}
-
-static void GLAPIENTRY _tnl_Vertex4f( GLfloat x, GLfloat y, GLfloat z, 
-				      GLfloat w )
-{
-   DISPATCH_ATTR4F( _TNL_ATTRIB_POS, x, y, z, w );
-}
-
-static void GLAPIENTRY _tnl_Vertex4fv( const GLfloat *v )
-{
-   DISPATCH_ATTR4FV( _TNL_ATTRIB_POS, v );
-}
-
-static void GLAPIENTRY _tnl_TexCoord1f( GLfloat x )
-{
-   DISPATCH_ATTR1F( _TNL_ATTRIB_TEX0, x );
-}
-
-static void GLAPIENTRY _tnl_TexCoord1fv( const GLfloat *v )
-{
-   DISPATCH_ATTR1FV( _TNL_ATTRIB_TEX0, v );
-}
-
-static void GLAPIENTRY _tnl_TexCoord2f( GLfloat x, GLfloat y )
-{
-   DISPATCH_ATTR2F( _TNL_ATTRIB_TEX0, x, y );
-}
-
-static void GLAPIENTRY _tnl_TexCoord2fv( const GLfloat *v )
-{
-   DISPATCH_ATTR2FV( _TNL_ATTRIB_TEX0, v );
-}
-
-static void GLAPIENTRY _tnl_TexCoord3f( GLfloat x, GLfloat y, GLfloat z )
-{
-   DISPATCH_ATTR3F( _TNL_ATTRIB_TEX0, x, y, z );
-}
-
-static void GLAPIENTRY _tnl_TexCoord3fv( const GLfloat *v )
-{
-   DISPATCH_ATTR3FV( _TNL_ATTRIB_TEX0, v );
-}
-
-static void GLAPIENTRY _tnl_TexCoord4f( GLfloat x, GLfloat y, GLfloat z,
-					GLfloat w )
-{
-   DISPATCH_ATTR4F( _TNL_ATTRIB_TEX0, x, y, z, w );
-}
-
-static void GLAPIENTRY _tnl_TexCoord4fv( const GLfloat *v )
-{
-   DISPATCH_ATTR4FV( _TNL_ATTRIB_TEX0, v );
-}
-
-static void GLAPIENTRY _tnl_Normal3f( GLfloat x, GLfloat y, GLfloat z )
-{
-   DISPATCH_ATTR3F( _TNL_ATTRIB_NORMAL, x, y, z );
-}
-
-static void GLAPIENTRY _tnl_Normal3fv( const GLfloat *v )
-{
-   DISPATCH_ATTR3FV( _TNL_ATTRIB_NORMAL, v );
-}
-
-static void GLAPIENTRY _tnl_FogCoordfEXT( GLfloat x )
-{
-   DISPATCH_ATTR1F( _TNL_ATTRIB_FOG, x );
-}
-
-static void GLAPIENTRY _tnl_FogCoordfvEXT( const GLfloat *v )
-{
-   DISPATCH_ATTR1FV( _TNL_ATTRIB_FOG, v );
-}
-
-static void GLAPIENTRY _tnl_Color3f( GLfloat x, GLfloat y, GLfloat z )
-{
-   DISPATCH_ATTR3F( _TNL_ATTRIB_COLOR0, x, y, z );
-}
-
-static void GLAPIENTRY _tnl_Color3fv( const GLfloat *v )
-{
-   DISPATCH_ATTR3FV( _TNL_ATTRIB_COLOR0, v );
-}
-
-static void GLAPIENTRY _tnl_Color4f( GLfloat x, GLfloat y, GLfloat z, 
-				     GLfloat w )
-{
-   DISPATCH_ATTR4F( _TNL_ATTRIB_COLOR0, x, y, z, w );
-}
-
-static void GLAPIENTRY _tnl_Color4fv( const GLfloat *v )
-{
-   DISPATCH_ATTR4FV( _TNL_ATTRIB_COLOR0, v );
-}
-
-static void GLAPIENTRY _tnl_SecondaryColor3fEXT( GLfloat x, GLfloat y, 
-						 GLfloat z )
-{
-   DISPATCH_ATTR3F( _TNL_ATTRIB_COLOR1, x, y, z );
-}
-
-static void GLAPIENTRY _tnl_SecondaryColor3fvEXT( const GLfloat *v )
-{
-   DISPATCH_ATTR3FV( _TNL_ATTRIB_COLOR1, v );
-}
-
-static void GLAPIENTRY _tnl_MultiTexCoord1f( GLenum target, GLfloat x  )
-{
-   GLuint attr = (target & 0x7) + _TNL_ATTRIB_TEX0;
-   DISPATCH_ATTR1F( attr, x );
-}
-
-static void GLAPIENTRY _tnl_MultiTexCoord1fv( GLenum target,
-					      const GLfloat *v )
-{
-   GLuint attr = (target & 0x7) + _TNL_ATTRIB_TEX0;
-   DISPATCH_ATTR1FV( attr, v );
-}
-
-static void GLAPIENTRY _tnl_MultiTexCoord2f( GLenum target, GLfloat x, 
-					     GLfloat y )
-{
-   GLuint attr = (target & 0x7) + _TNL_ATTRIB_TEX0;
-   DISPATCH_ATTR2F( attr, x, y );
-}
-
-static void GLAPIENTRY _tnl_MultiTexCoord2fv( GLenum target, 
-					      const GLfloat *v )
-{
-   GLuint attr = (target & 0x7) + _TNL_ATTRIB_TEX0;
-   DISPATCH_ATTR2FV( attr, v );
-}
-
-static void GLAPIENTRY _tnl_MultiTexCoord3f( GLenum target, GLfloat x, 
-					     GLfloat y, GLfloat z)
-{
-   GLuint attr = (target & 0x7) + _TNL_ATTRIB_TEX0;
-   DISPATCH_ATTR3F( attr, x, y, z );
-}
-
-static void GLAPIENTRY _tnl_MultiTexCoord3fv( GLenum target, 
-					      const GLfloat *v )
-{
-   GLuint attr = (target & 0x7) + _TNL_ATTRIB_TEX0;
-   DISPATCH_ATTR3FV( attr, v );
-}
-
-static void GLAPIENTRY _tnl_MultiTexCoord4f( GLenum target, GLfloat x, 
-					     GLfloat y, GLfloat z,
-					     GLfloat w )
-{
-   GLuint attr = (target & 0x7) + _TNL_ATTRIB_TEX0;
-   DISPATCH_ATTR4F( attr, x, y, z, w );
-}
-
-static void GLAPIENTRY _tnl_MultiTexCoord4fv( GLenum target, 
-					      const GLfloat *v )
-{
-   GLuint attr = (target & 0x7) + _TNL_ATTRIB_TEX0;
-   DISPATCH_ATTR4FV( attr, v );
-}
-
-static void GLAPIENTRY _tnl_VertexAttrib1fNV( GLuint index, GLfloat x )
-{
-   if (index >= VERT_ATTRIB_MAX) index = ERROR_ATTRIB;
-   DISPATCH_ATTR1F( index, x );
-}
-
-static void GLAPIENTRY _tnl_VertexAttrib1fvNV( GLuint index, 
-					       const GLfloat *v )
-{
-   if (index >= VERT_ATTRIB_MAX) index = ERROR_ATTRIB;
-   DISPATCH_ATTR1FV( index, v );
-}
-
-static void GLAPIENTRY _tnl_VertexAttrib2fNV( GLuint index, GLfloat x, 
-					      GLfloat y )
-{
-   if (index >= VERT_ATTRIB_MAX) index = ERROR_ATTRIB;
-   DISPATCH_ATTR2F( index, x, y );
-}
-
-static void GLAPIENTRY _tnl_VertexAttrib2fvNV( GLuint index,
-					       const GLfloat *v )
-{
-   if (index >= VERT_ATTRIB_MAX) index = ERROR_ATTRIB;
-   DISPATCH_ATTR2FV( index, v );
-}
-
-static void GLAPIENTRY _tnl_VertexAttrib3fNV( GLuint index, GLfloat x,
-					      GLfloat y, GLfloat z )
-{
-   if (index >= VERT_ATTRIB_MAX) index = ERROR_ATTRIB;
-   DISPATCH_ATTR3F( index, x, y, z );
-}
+   GLuint i;
 
-static void GLAPIENTRY _tnl_VertexAttrib3fvNV( GLuint index,
-					       const GLfloat *v )
-{
-   if (index >= VERT_ATTRIB_MAX) index = ERROR_ATTRIB;
-   DISPATCH_ATTR3FV( index, v );
-}
+   for (i = 0 ; i < _TNL_ATTRIB_MAX ; i++) 
+      if (tnl->vtx.attrsz[i]) {
+	 GLuint j = tnl->vtx.attrsz[i] - 1;
+	 tnl->vtx.attrsz[i] = 0;
 
-static void GLAPIENTRY _tnl_VertexAttrib4fNV( GLuint index, GLfloat x,
-					      GLfloat y, GLfloat z,
-					      GLfloat w )
-{
-   if (index >= VERT_ATTRIB_MAX) index = ERROR_ATTRIB;
-   DISPATCH_ATTR4F( index, x, y, z, w );
-}
+	 if (i < _TNL_MAX_ATTR_CODEGEN)
+	    tnl->vtx.tabfv[i][j] = choose[i][j];
+      }
 
-static void GLAPIENTRY _tnl_VertexAttrib4fvNV( GLuint index, 
-					       const GLfloat *v )
-{
-   if (index >= VERT_ATTRIB_MAX) index = ERROR_ATTRIB;
-   DISPATCH_ATTR4FV( index, v );
+   tnl->vtx.vertex_size = 0;
+   tnl->vtx.have_materials = 0;
 }
+      
 
 
 /* Materials:  
@@ -797,35 +497,33 @@ static void GLAPIENTRY _tnl_VertexAttrib4fvNV( GLuint index,
  *
  * There is no aliasing of material attributes with other entrypoints.
  */
-#define MAT_ATTR( A, N, params )			\
+#define OTHER_ATTR( A, N, params )		\
 do {							\
    if (tnl->vtx.attrsz[A] != N) {			\
       _tnl_fixup_vertex( ctx, A, N );			\
-      tnl->vtx.have_materials = GL_TRUE;		\
    }							\
 							\
    {							\
       GLfloat *dest = tnl->vtx.attrptr[A];		\
-      if (N>0) dest[0] = params[0];			\
-      if (N>1) dest[1] = params[1];			\
-      if (N>2) dest[2] = params[2];			\
-      if (N>3) dest[3] = params[3];			\
+      if (N>0) dest[0] = (params)[0];			\
+      if (N>1) dest[1] = (params)[1];			\
+      if (N>2) dest[2] = (params)[2];			\
+      if (N>3) dest[3] = (params)[3];			\
       ctx->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT;	\
    }							\
 } while (0)
 
 
-#define MAT( ATTR, N, face, params )			\
-do {							\
-   if (face != GL_BACK)					\
-      MAT_ATTR( ATTR, N, params ); /* front */		\
-   if (face != GL_FRONT)				\
-      MAT_ATTR( ATTR + 1, N, params ); /* back */	\
+#define MAT( ATTR, N, face, params )				\
+do {								\
+   if (face != GL_BACK)						\
+      OTHER_ATTR( ATTR, N, params ); /* front */	\
+   if (face != GL_FRONT)					\
+      OTHER_ATTR( ATTR + 1, N, params ); /* back */	\
 } while (0)
 
 
-/* NOTE: Have to remove/deal-with colormaterial crossovers, probably
- * later on - in the meantime just store everything.  
+/* Colormaterial is dealt with later on.
  */
 static void GLAPIENTRY _tnl_Materialfv( GLenum face, GLenum pname, 
 			       const GLfloat *params )
@@ -871,44 +569,43 @@ static void GLAPIENTRY _tnl_Materialfv( GLenum face, GLenum pname,
       _mesa_error( ctx, GL_INVALID_ENUM, "glMaterialfv" );
       return;
    }
-}
-
 
-#define IDX_ATTR( A, IDX )				\
-do {							\
-   GET_CURRENT_CONTEXT( ctx );				\
-   TNLcontext *tnl = TNL_CONTEXT(ctx);			\
-							\
-   if (tnl->vtx.attrsz[A] != 1) {			\
-      _tnl_fixup_vertex( ctx, A, 1 );			\
-   }							\
-							\
-   {							\
-      GLfloat *dest = tnl->vtx.attrptr[A];		\
-      dest[0] = IDX;				\
-      ctx->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT;	\
-   }							\
-} while (0)
+   tnl->vtx.have_materials = GL_TRUE;
+}
 
 
 static void GLAPIENTRY _tnl_EdgeFlag( GLboolean b )
 {
-   IDX_ATTR( _TNL_ATTRIB_EDGEFLAG, (GLfloat)b );
+   GET_CURRENT_CONTEXT( ctx ); 
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLfloat f = (GLfloat)b;
+
+   OTHER_ATTR( _TNL_ATTRIB_EDGEFLAG, 1, &f );
 }
 
 static void GLAPIENTRY _tnl_EdgeFlagv( const GLboolean *v )
 {
-   IDX_ATTR( _TNL_ATTRIB_EDGEFLAG, (GLfloat)v[0] );
+   GET_CURRENT_CONTEXT( ctx ); 
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLfloat f = (GLfloat)v[0];
+
+   OTHER_ATTR( _TNL_ATTRIB_EDGEFLAG, 1, &f );
 }
 
 static void GLAPIENTRY _tnl_Indexf( GLfloat f )
 {
-   IDX_ATTR( _TNL_ATTRIB_INDEX, f );
+   GET_CURRENT_CONTEXT( ctx ); 
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+
+   OTHER_ATTR( _TNL_ATTRIB_INDEX, 1, &f );
 }
 
 static void GLAPIENTRY _tnl_Indexfv( const GLfloat *v )
 {
-   IDX_ATTR( _TNL_ATTRIB_INDEX, v[0] );
+   GET_CURRENT_CONTEXT( ctx ); 
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+
+   OTHER_ATTR( _TNL_ATTRIB_INDEX, 1, v );
 }
 
 /* Eval
@@ -1029,7 +726,8 @@ static void GLAPIENTRY _tnl_Begin( GLenum mode )
 
       if (ctx->NewState) {
 	 _mesa_update_state( ctx );
-	 if (!(tnl->Driver.NotifyBegin && tnl->Driver.NotifyBegin( ctx, mode )))
+	 if (!(tnl->Driver.NotifyBegin && 
+	       tnl->Driver.NotifyBegin( ctx, mode )))
 	     ctx->Exec->Begin(mode);
 	 return;
       }
@@ -1086,14 +784,11 @@ static void GLAPIENTRY _tnl_End( void )
 static void _tnl_exec_vtxfmt_init( GLcontext *ctx )
 {
    GLvertexformat *vfmt = &(TNL_CONTEXT(ctx)->exec_vtxfmt);
+
    vfmt->ArrayElement = _ae_loopback_array_elt;	        /* generic helper */
    vfmt->Begin = _tnl_Begin;
    vfmt->CallList = _mesa_CallList;
    vfmt->CallLists = _mesa_CallLists;
-   vfmt->Color3f = _tnl_Color3f;
-   vfmt->Color3fv = _tnl_Color3fv;
-   vfmt->Color4f = _tnl_Color4f;
-   vfmt->Color4fv = _tnl_Color4fv;
    vfmt->EdgeFlag = _tnl_EdgeFlag;
    vfmt->EdgeFlagv = _tnl_EdgeFlagv;
    vfmt->End = _tnl_End;
@@ -1103,45 +798,9 @@ static void _tnl_exec_vtxfmt_init( GLcontext *ctx )
    vfmt->EvalCoord2fv = _tnl_EvalCoord2fv;
    vfmt->EvalPoint1 = _tnl_EvalPoint1;
    vfmt->EvalPoint2 = _tnl_EvalPoint2;
-   vfmt->FogCoordfEXT = _tnl_FogCoordfEXT;
-   vfmt->FogCoordfvEXT = _tnl_FogCoordfvEXT;
    vfmt->Indexf = _tnl_Indexf;
    vfmt->Indexfv = _tnl_Indexfv;
    vfmt->Materialfv = _tnl_Materialfv;
-   vfmt->MultiTexCoord1fARB = _tnl_MultiTexCoord1f;
-   vfmt->MultiTexCoord1fvARB = _tnl_MultiTexCoord1fv;
-   vfmt->MultiTexCoord2fARB = _tnl_MultiTexCoord2f;
-   vfmt->MultiTexCoord2fvARB = _tnl_MultiTexCoord2fv;
-   vfmt->MultiTexCoord3fARB = _tnl_MultiTexCoord3f;
-   vfmt->MultiTexCoord3fvARB = _tnl_MultiTexCoord3fv;
-   vfmt->MultiTexCoord4fARB = _tnl_MultiTexCoord4f;
-   vfmt->MultiTexCoord4fvARB = _tnl_MultiTexCoord4fv;
-   vfmt->Normal3f = _tnl_Normal3f;
-   vfmt->Normal3fv = _tnl_Normal3fv;
-   vfmt->SecondaryColor3fEXT = _tnl_SecondaryColor3fEXT;
-   vfmt->SecondaryColor3fvEXT = _tnl_SecondaryColor3fvEXT;
-   vfmt->TexCoord1f = _tnl_TexCoord1f;
-   vfmt->TexCoord1fv = _tnl_TexCoord1fv;
-   vfmt->TexCoord2f = _tnl_TexCoord2f;
-   vfmt->TexCoord2fv = _tnl_TexCoord2fv;
-   vfmt->TexCoord3f = _tnl_TexCoord3f;
-   vfmt->TexCoord3fv = _tnl_TexCoord3fv;
-   vfmt->TexCoord4f = _tnl_TexCoord4f;
-   vfmt->TexCoord4fv = _tnl_TexCoord4fv;
-   vfmt->Vertex2f = _tnl_Vertex2f;
-   vfmt->Vertex2fv = _tnl_Vertex2fv;
-   vfmt->Vertex3f = _tnl_Vertex3f;
-   vfmt->Vertex3fv = _tnl_Vertex3fv;
-   vfmt->Vertex4f = _tnl_Vertex4f;
-   vfmt->Vertex4fv = _tnl_Vertex4fv;
-   vfmt->VertexAttrib1fNV = _tnl_VertexAttrib1fNV;
-   vfmt->VertexAttrib1fvNV = _tnl_VertexAttrib1fvNV;
-   vfmt->VertexAttrib2fNV = _tnl_VertexAttrib2fNV;
-   vfmt->VertexAttrib2fvNV = _tnl_VertexAttrib2fvNV;
-   vfmt->VertexAttrib3fNV = _tnl_VertexAttrib3fNV;
-   vfmt->VertexAttrib3fvNV = _tnl_VertexAttrib3fvNV;
-   vfmt->VertexAttrib4fNV = _tnl_VertexAttrib4fNV;
-   vfmt->VertexAttrib4fvNV = _tnl_VertexAttrib4fvNV;
 
    vfmt->Rectf = _mesa_noop_Rectf;
    vfmt->EvalMesh1 = _mesa_noop_EvalMesh1;
@@ -1161,13 +820,9 @@ void _tnl_FlushVertices( GLcontext *ctx, GLuint flags )
       _tnl_flush_vtx( ctx );
    }
 
-   {
+   if (tnl->vtx.vertex_size) {
       _tnl_copy_to_current( ctx );
-
-      /* reset attrfv table
-       */
-      init_attrfv( tnl );
-      flags |= FLUSH_UPDATE_CURRENT;
+      reset_attrfv( tnl );
    }
 
    ctx->Driver.NeedFlush = 0;
@@ -1191,26 +846,92 @@ static void _tnl_current_init( GLcontext *ctx )
    tnl->vtx.current[_TNL_ATTRIB_INDEX] = &ctx->Current.Index;
 }
 
+static struct dynfn *no_codegen( GLcontext *ctx, int key )
+{
+   return 0;
+}
 
 void _tnl_vtx_init( GLcontext *ctx )
 {
    TNLcontext *tnl = TNL_CONTEXT(ctx); 
    struct tnl_vertex_arrays *tmp = &tnl->vtx_inputs;
    GLuint i;
+   static int firsttime = 1;
+   
+   if (firsttime) {
+      firsttime = 0;
+
+      INIT_CHOOSERS( 0 );
+      INIT_CHOOSERS( 1 );
+      INIT_CHOOSERS( 2 );
+      INIT_CHOOSERS( 3 );
+      INIT_CHOOSERS( 4 );
+      INIT_CHOOSERS( 5 );
+      INIT_CHOOSERS( 6 );
+      INIT_CHOOSERS( 7 );
+      INIT_CHOOSERS( 8 );
+      INIT_CHOOSERS( 9 );
+      INIT_CHOOSERS( 10 );
+      INIT_CHOOSERS( 11 );
+      INIT_CHOOSERS( 12 );
+      INIT_CHOOSERS( 13 );
+      INIT_CHOOSERS( 14 );
+      INIT_CHOOSERS( 15 );
+
+      choose[ERROR_ATTRIB][0] = error_attrib;
+      choose[ERROR_ATTRIB][1] = error_attrib;
+      choose[ERROR_ATTRIB][2] = error_attrib;
+      choose[ERROR_ATTRIB][3] = error_attrib;
+
+      _tnl_generic_attr_table_init( generic_attr_func );
+   }
 
    for (i = 0; i < _TNL_ATTRIB_INDEX; i++)
       _mesa_vector4f_init( &tmp->Attribs[i], 0, 0);
 
+   for (i = 0; i < 4; i++) {
+      make_empty_list( &tnl->vtx.cache.Vertex[i] );
+      make_empty_list( &tnl->vtx.cache.Attribute[i] );
+      tnl->vtx.gen.Vertex[i] = no_codegen;
+      tnl->vtx.gen.Attribute[i] = no_codegen;
+   }
+
+   _tnl_InitX86Codegen( &tnl->vtx.gen );
+
    _tnl_current_init( ctx );
    _tnl_exec_vtxfmt_init( ctx );
+   _tnl_generic_exec_vtxfmt_init( ctx );
 
    _mesa_install_exec_vtxfmt( ctx, &tnl->exec_vtxfmt );
-   tnl->vtx.vertex_size = 1; init_attrfv( tnl );
+
+   memcpy( tnl->vtx.tabfv, choose, sizeof(choose) );
+
+   for (i = 0 ; i < _TNL_ATTRIB_MAX ; i++) 
+      tnl->vtx.attrsz[i] = 0;
+
+   tnl->vtx.vertex_size = 0;
+   tnl->vtx.have_materials = 0;
 }
 
+static void free_funcs( struct dynfn *l )
+{
+   struct dynfn *f, *tmp;
+   foreach_s (f, tmp, l) {
+      remove_from_list( f );
+      ALIGN_FREE( f->code );
+      FREE( f );
+   }
+}
 
 
 void _tnl_vtx_destroy( GLcontext *ctx )
 {
+   TNLcontext *tnl = TNL_CONTEXT(ctx); 
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      free_funcs( &tnl->vtx.cache.Vertex[i] );
+      free_funcs( &tnl->vtx.cache.Attribute[i] ); 
+   }
 }
 
diff --git a/src/mesa/tnl/t_vtx_api.h b/src/mesa/tnl/t_vtx_api.h
index 2500320213..5e4ab71a08 100644
--- a/src/mesa/tnl/t_vtx_api.h
+++ b/src/mesa/tnl/t_vtx_api.h
@@ -37,12 +37,23 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #include "t_context.h"
 
+#define ERROR_ATTRIB 16
+
+
+
+/* t_vtx_api.c:
+ */
 extern void _tnl_vtx_init( GLcontext *ctx );
 extern void _tnl_vtx_destroy( GLcontext *ctx );
 
 extern void _tnl_FlushVertices( GLcontext *ctx, GLuint flags );
 extern void _tnl_flush_vtx( GLcontext *ctx );
 
+extern void _tnl_wrap_filled_vertex( GLcontext *ctx );
+
+/* t_vtx_exec.c:
+ */
+
 extern void _tnl_do_EvalCoord2f( GLcontext* ctx, GLfloat u, GLfloat v );
 extern void _tnl_do_EvalCoord1f(GLcontext* ctx, GLfloat u);
 extern void _tnl_update_eval( GLcontext *ctx );
@@ -55,4 +66,19 @@ extern GLboolean *_tnl_translate_edgeflag( GLcontext *ctx,
 extern GLboolean *_tnl_import_current_edgeflag( GLcontext *ctx,
 						GLuint count );
 
+
+
+/* t_vtx_generic.c:
+ */
+extern void _tnl_generic_exec_vtxfmt_init( GLcontext *ctx );
+
+extern void _tnl_generic_attr_table_init( attrfv_func (*tab)[4] );
+
+/* t_vtx_x86.c:
+ */
+extern void _tnl_InitX86Codegen( struct dynfn_generators *gen );
+
+
+
+
 #endif
diff --git a/src/mesa/tnl/t_vtx_x86_gcc.S b/src/mesa/tnl/t_vtx_x86_gcc.S
index 3a78838b67..dcaca47160 100644
--- a/src/mesa/tnl/t_vtx_x86_gcc.S
+++ b/src/mesa/tnl/t_vtx_x86_gcc.S
@@ -25,9 +25,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 **************************************************************************/
 
-	
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
 
-	
 			
 #define GLOBL( x )	\
 .globl x;		\
@@ -36,113 +38,127 @@ x:
 .data
 .align 4
 
+// Someone who knew a lot about this sort of thing would use this
+// macro to note current offsets, etc in a special region of the
+// object file & just make everything work out neat.  I don't know
+// enough to do that...
 	
+#define SUBST( x ) (0x10101010 + x)	
 	
+
+
 GLOBL ( _x86_Vertex1fv )
-	;; v already in eax
+	movl    4(%esp), %ecx	
 	push	%edi
 	push	%esi
-	movl	(0x0), %edi	; load vbptr
-	movl	(%eax), %edx	; load v[0]
-	movl	%edx, (%edi)	; vbptr[0] = v[0]
-	addl	$4, %edi	; vbptr += 1
-	movl	$0x0, %ecx	; vertex_size - 1
-	movl	$0x0, %esi	; tnl->vtx.vertex + 1
+	movl	SUBST(0), %edi	// 0x0 --> tnl->vtx.vbptr
+	movl	(%ecx), %edx	// load v[0]
+	movl	%edx, (%edi)	// tnl->vtx.vbptr[0] = v[0]
+	addl	$4, %edi	// tnl->vtx.vbptr += 1
+	movl	$SUBST(1), %ecx	// 0x1 --> (tnl->vtx.vertex_size - 1)
+	movl	$SUBST(2), %esi	// 0x2 --> (tnl->vtx.vertex + 1)
 	repz
-	movsl %ds:(%esi), %es:(%edi)
-	movl	%edi, (0)	; save vbptr
-	movl	(0), %edx	; load counter
+	movsl   %ds:(%esi), %es:(%edi)
+	movl	%edi, SUBST(0)	// 0x0 --> tnl->vtx.vbptr
+	movl	SUBST(3), %edx	// 0x3 --> counter
 	pop	%esi
 	pop	%edi
-	dec	%edx		; counter--
-	movl	%edx, (0)	; save counter
-	je	.5		; if (counter != 0)
-	ret			;    return
-.5:	jmp    *0		; else notify();
+	dec	%edx		// counter--
+	movl	%edx, SUBST(3)	// 0x3 --> counter 
+	je	.5		// if (counter != 0)
+	ret			//    return
+.5:	mov     $SUBST(4), %eax	// else notify()
+        jmp     *%eax           // jmp $0x10101014 doesn't seem to work
 GLOBL ( _x86_Vertex1fv_end )
 
 
+.align 4
 GLOBL ( _x86_Vertex2fv )
-	;; v already in eax
+	movl    4(%esp), %ecx	
 	push	%edi
 	push	%esi
-	movl	(0x0), %edi	; load vbptr
-	movl	(%eax), %edx	; load v[0]
-	movl	4(%eax), %ecx	; load v[1]
-	movl	%edx, (%edi)	; vbptr[0] = v[0]
-	movl	%ecx, 4(%edi)	; vbptr[1] = v[1]
-	addl	$8, %edi	; vbptr += 2
-	movl	$0x0, %ecx	; vertex_size - 2
-	movl	$0x0, %esi	; tnl->vtx.vertex + 2
+	movl	SUBST(0), %edi	// load tnl->vtx.vbptr
+	movl	(%ecx), %edx	// load v[0]
+	movl	4(%ecx), %eax	// load v[1]
+	movl	%edx, (%edi)	// tnl->vtx.vbptr[0] = v[0]
+	movl	%eax, 4(%edi)	// tnl->vtx.vbptr[1] = v[1]
+	addl	$8, %edi	// tnl->vtx.vbptr += 2
+	movl	$SUBST(1), %ecx	// vertex_size - 2
+	movl	$SUBST(2), %esi	// tnl->vtx.vertex + 2
 	repz
 	movsl %ds:(%esi), %es:(%edi)
-	movl	%edi, (0)	; save vbptr
-	movl	(0), %edx	; load counter
+	movl	%edi, SUBST(0)	// save tnl->vtx.vbptr
+	movl	SUBST(3), %edx	// load counter
 	pop	%esi
 	pop	%edi
-	dec	%edx		; counter--
-	movl	%edx, (0)	; save counter
-	je	.6		; if (counter != 0)
-	ret			;    return
-.6:	jmp    *0		; else notify();
-GLOBL ( _x86_Vertex3fv_end )
+	dec	%edx		// counter--
+	movl	%edx, SUBST(3)	// save counter
+	je	.6		// if (counter != 0)
+	ret			//    return
+.6:	mov     $SUBST(4), %eax	// else notify()
+        jmp     *%eax           // jmp $0x10101014 doesn't seem to work
+GLOBL ( _x86_Vertex2fv_end )
 
+.align 4
 GLOBL ( _x86_Vertex3fv )
-	;; v already in eax
+	movl    4(%esp), %ecx	
 	push	%edi
 	push	%esi
-	movl	(0x0), %edi	; load vbptr
-	movl	(%eax), %edx	; load v[0]
-	movl	4(%eax), %ecx	; load v[1]
-	movl	8(%eax), %esi	; load v[2]
-	movl	%edx, (%edi)	; vbptr[0] = v[0]
-	movl	%ecx, 4(%edi)	; vbptr[1] = v[1]
-	movl	%esi, 8(%edi)	; vbptr[2] = v[2]
-	addl	$12, %edi	; vbptr += 3
-	movl	$0x0, %ecx	; vertex_size - 3
-	movl	$0x0, %esi	; tnl->vtx.vertex + 3
+	movl	SUBST(0), %edi	// load tnl->vtx.vbptr
+	movl	(%ecx), %edx	// load v[0]
+	movl	4(%ecx), %eax	// load v[1]
+	movl	8(%ecx), %esi	// load v[2]
+	movl	%edx, (%edi)	// tnl->vtx.vbptr[0] = v[0]
+	movl	%eax, 4(%edi)	// tnl->vtx.vbptr[1] = v[1]
+	movl	%esi, 8(%edi)	// tnl->vtx.vbptr[2] = v[2]
+	addl	$12, %edi	// tnl->vtx.vbptr += 3
+	movl	$SUBST(1), %ecx	// vertex_size - 3
+	movl	$SUBST(2), %esi	// tnl->vtx.vertex + 3
 	repz
 	movsl %ds:(%esi), %es:(%edi)
-	movl	%edi, (0)	; save vbptr
-	movl	(0), %edx	; load counter
+	movl	%edi, SUBST(0)	// save tnl->vtx.vbptr
+	movl	SUBST(3), %edx	// load counter
 	pop	%esi
 	pop	%edi
-	dec	%edx		; counter--
-	movl	%edx, (0)	; save counter
-	je	.7		; if (counter != 0)
-	ret			;    return
-.7:	jmp    *0		; else notify();
+	dec	%edx		// counter--
+	movl	%edx, SUBST(3)	// save counter
+	je	.7		// if (counter != 0)
+	ret			//    return
+.7:	mov     $SUBST(4), %eax	// else notify()
+        jmp     *%eax           // jmp $0x10101014 doesn't seem to work
 GLOBL ( _x86_Vertex3fv_end )
 
 			
+.align 4
 GLOBL ( _x86_Vertex4fv )
-	;; v already in eax
+	movl    4(%esp), %ecx	
 	push	%edi
 	push	%esi
-	movl	(0x0), %edi	; load vbptr
-	movl	(%eax), %edx	; load v[0]
-	movl	4(%eax), %ecx	; load v[1]
-	movl	8(%eax), %esi	; load v[2]
-	movl	%edx, (%edi)	; vbptr[0] = v[0]
-	movl	%ecx, 4(%edi)	; vbptr[1] = v[1]
-	movl	%esi, 8(%edi)	; vbptr[2] = v[2]
-	movl	12(%eax), %esi	; load v[3]
-	movl	%esi, 12(%edi)	; vbptr[3] = v[3]
-	addl	$16, %edi	; vbptr += 4
-	movl	$0x0, %ecx	; vertex_size - 4
-	movl	$0x0, %esi	; tnl->vtx.vertex + 3
+	movl	SUBST(0), %edi	// load tnl->vtx.vbptr
+	movl	(%ecx), %edx	// load v[0]
+	movl	4(%ecx), %eax	// load v[1]
+	movl	8(%ecx), %esi	// load v[2]
+	movl	12(%ecx), %ecx	// load v[3]
+	movl	%edx, (%edi)	// tnl->vtx.vbptr[0] = v[0]
+	movl	%eax, 4(%edi)	// tnl->vtx.vbptr[1] = v[1]
+	movl	%esi, 8(%edi)	// tnl->vtx.vbptr[2] = v[2]
+	movl	%ecx, 12(%edi)	// tnl->vtx.vbptr[3] = v[3]
+	addl	$16, %edi	// tnl->vtx.vbptr += 4
+	movl	$SUBST(1), %ecx	// vertex_size - 4
+	movl	$SUBST(2), %esi	// tnl->vtx.vertex + 3
 	repz
-	movsl %ds:(%esi), %es:(%edi)
-	movl	%edi, (0)	; save vbptr
-	movl	(0), %edx	; load counter
+	movsl   %ds:(%esi), %es:(%edi)
+	movl	%edi, SUBST(0)	// save tnl->vtx.vbptr
+	movl	SUBST(3), %edx	// load counter
 	pop	%esi
 	pop	%edi
-	dec	%edx		; counter--
-	movl	%edx, (0)	; save counter
-	je	.6		; if (counter != 0)
-	ret			;    return
-.6:	jmp    *0		; else notify();
-GLOBL ( _x86_Vertex3fv_end )
+	dec	%edx		// counter--
+	movl	%edx, SUBST(3)	// save counter
+	je	.6		// if (counter != 0)
+	ret			//    return
+.8:	mov     $SUBST(4), %eax	// else notify()
+        jmp     *%eax           // jmp $0x10101014 doesn't seem to work
+GLOBL ( _x86_Vertex4fv_end )
 
 
 	
@@ -151,92 +167,151 @@ GLOBL ( _x86_Vertex3fv_end )
  */
 
 GLOBL( _x86_Attribute1fv)
-	/* 'v' is already in eax */
-	movl (%eax), %ecx       /* load v[0] */
-	movl %ecx, 0      	/* store v[0] to current vertex */
+	movl 4(%esp), %ecx	
+	movl (%ecx), %eax       /* load v[0] */
+	movl %eax, SUBST(0)    	/* store v[0] to current vertex */
 	ret
-GLOBL ( _x86_Attribute2fv_end )
+GLOBL ( _x86_Attribute1fv_end )
 
 GLOBL( _x86_Attribute2fv)
-	/* 'v' is already in eax */
-	movl (%eax), %ecx       /* load v[0] */
-	movl 4(%eax), %eax      /* load v[1] */
-	movl %ecx, 0      	/* store v[0] to current vertex */
-	movl %eax, 4      	/* store v[1] to current vertex */
+	movl 4(%esp), %ecx	
+	movl (%ecx), %eax       /* load v[0] */
+	movl 4(%ecx), %edx      /* load v[1] */
+	movl %eax, SUBST(0)    	/* store v[0] to current vertex */
+	movl %edx, SUBST(1)    	/* store v[1] to current vertex */
 	ret
 GLOBL ( _x86_Attribute2fv_end )
 
 
 GLOBL( _x86_Attribute3fv)
-	/* 'v' is already in eax */
-	movl (%eax), %ecx       /* load v[0] */
-	movl 4(%eax), %edx      /* load v[1] */
-	movl 8(%eax), %eax      /* load v[2] */
-	movl %ecx, 0      	/* store v[0] to current vertex */
-	movl %edx, 4      	/* store v[1] to current vertex */
-	movl %eax, 8      	/* store v[2] to current vertex */
+	movl 4(%esp), %ecx	
+	movl (%ecx), %eax       /* load v[0] */
+	movl 4(%ecx), %edx      /* load v[1] */
+	movl 8(%ecx), %ecx      /* load v[2] */
+	movl %eax, SUBST(0)    	/* store v[0] to current vertex */
+	movl %edx, SUBST(1)    	/* store v[1] to current vertex */
+	movl %ecx, SUBST(2)   	/* store v[2] to current vertex */
 	ret
 GLOBL ( _x86_Attribute3fv_end )
 
 GLOBL( _x86_Attribute4fv)
-	/* 'v' is already in eax */
-	movl (%eax), %ecx       /* load v[0] */
-	movl 4(%eax), %edx      /* load v[1] */
-	movl %ecx, 0      	/* store v[0] to current vertex */
-	movl %edx, 4      	/* store v[1] to current vertex */
-	movl 8(%eax), %ecx      /* load v[2] */
-	movl 12(%eax), %edx      /* load v[3] */
-	movl %ecx, 8      	/* store v[2] to current vertex */
-	movl %edx, 12      	/* store v[3] to current vertex */
+	movl 4(%esp), %ecx	
+	movl (%ecx), %eax       /* load v[0] */
+	movl 4(%ecx), %edx      /* load v[1] */
+	movl %eax, SUBST(0)    	/* store v[0] to current vertex */
+	movl %edx, SUBST(1)    	/* store v[1] to current vertex */
+	movl 8(%ecx), %eax      /* load v[2] */
+	movl 12(%ecx), %edx     /* load v[3] */
+	movl %eax, SUBST(2)    	/* store v[2] to current vertex */
+	movl %edx, SUBST(3)    	/* store v[3] to current vertex */
 	ret
-GLOBL ( _x86_Attribute3fv_end )
+GLOBL ( _x86_Attribute4fv_end )
+
+
+// Choosers:
+
+// Must generate all of these ahead of first usage.  Generate at
+// compile-time?  
+	
+// NOT CURRENTLY USED
 
 
-;;; In the 1st level dispatch functions, switch to a different
-;;; calling convention -- (const GLfloat *v) in %eax.
-;;; 
-;;; As with regular (x86) dispatch, don't create a new stack frame -
-;;; just let the 'ret' in the dispatched function return straight
-;;; back to the original caller.
+GLOBL( _x86_choose_fv)
+	subl	$12, %esp	// gcc does 16 byte alignment of stack frames?
+	movl	$SUBST(0), (%esp)	// arg 0 - attrib
+	movl	$SUBST(1), 4(%esp)	// arg 1 - N
+	call    _do_choose	// new function returned in %eax
+	add     $12, %esp	// tear down stack frame
+	jmp     *%eax		// jump to new func
+GLOBL ( _x86_choosefv_end )
+	
+	
 
 
+// FIRST LEVEL FUNCTIONS -- these are plugged directly into GL dispatch.
 	
-;;; Vertex/Normal/Color, etc: the address of the function pointer
-;;; is known at codegen time.
+
+// NOT CURRENTLY USED
+
 	
+		
+// In the 1st level dispatch functions, switch to a different
+// calling convention -- (const GLfloat *v) in %ecx.
+// 
+// As with regular (x86) dispatch, don't create a new stack frame -
+// just let the 'ret' in the dispatched function return straight
+// back to the original caller.
+
+
+	
+// Vertex/Normal/Color, etc: the address of the function pointer
+// is known at codegen time.
+
+
+// Unfortunately, have to play with the stack in the non-fv case:
+// 
 GLOBL( _x86_dispatch_attrf )
-	leal	4(%esp), %eax
-	jmp	*foo
+	subl	$12, %esp	// gcc does 16 byte alignment of stack frames?
+	leal	16(%esp), %edx	// address of first float on stack
+	movl	%edx, (%esp)	// save as 'v'
+	call	SUBST(0)	// 0x0 --> tabfv[attr][n]
+	addl	$12, %esp	// tear down frame
+	ret			// return
 GLOBL( _x86_dispatch_attrf_end )
 
+// The fv case is simpler:
+// 
 GLOBL( _x86_dispatch_attrfv )
-	movl	4(%esp), %eax
-	jmp	*foo
-GLOBL( _x86_dispatch_attr1f_end )
+	jmp	SUBST(0)	// 0x0 --> tabfv[attr][n]
+GLOBL( _x86_dispatch_attrfv_end )
 
-;;; MultiTexcoord: the address of the function pointer must be
-;;; calculated.
-	
+
+// MultiTexcoord: the address of the function pointer must be
+// calculated, but can use the index argument slot to hold 'v', and
+// avoid setting up a new stack frame.
+
+// Also, will only need a maximum of four of each of these per context:
+// 
 GLOBL( _x86_dispatch_multitexcoordf )
-	leal	4(%esp), %eax
-	jmp	*foo
+	movl	4(%esp), %ecx
+	leal	8(%esp), %edx
+	andl	$7, %ecx
+	movl	%edx, 4(%esp)
+	sall	$4, %ecx
+	jmp	*SUBST(0)(%ecx)	// 0x0 - tabfv[tex0][n]
 GLOBL( _x86_dispatch_multitexcoordf_end )
 
 GLOBL( _x86_dispatch_multitexcoordfv )
-	movl	4(%esp), %eax
-	jmp	*foo
+	movl	4(%esp), %ecx
+	movl	8(%esp), %edx
+	andl	$7, %ecx
+	movl	%edx, 4(%esp)
+	sall	$4, %ecx
+	jmp	*SUBST(0)(%ecx)	// 0x0 - tabfv[tex0][n]
 GLOBL( _x86_dispatch_multitexcoordfv_end )
 				
-;;; VertexAttrib: the address of the function pointer must be
-;;; calculated.
+// VertexAttrib: the address of the function pointer must be
+// calculated.
 
 GLOBL( _x86_dispatch_vertexattribf )
-	leal	4(%esp), %eax
-	jmp	*foo
+	movl	$16, %ecx
+	movl	4(%esp), %eax
+	cmpl	$16, %eax
+	cmovge	%ecx, %eax
+	leal	8(%esp), %ecx	// calculate 'v'
+	movl	%ecx, 4(%esp)	// save in 1st arg slot
+	sall	$4, %eax
+	jmp	*SUBST(0)(%eax)	// 0x0 - tabfv[0][n]
 GLOBL( _x86_dispatch_vertexattribf_end )
 
 GLOBL( _x86_dispatch_vertexattribfv )
+	movl	$16, %ecx
 	movl	4(%esp), %eax
-	jmp	*foo
+	cmpl	$16, %eax
+	cmovge	%ecx, %eax
+	movl	8(%esp), %ecx	// load 'v'
+	movl	%ecx, 4(%esp)	// save in 1st arg slot
+	sall	$4, %eax
+	jmp	*SUBST(0)(%eax)	// 0x0 - tabfv[0][n]
 GLOBL( _x86_dispatch_vertexattribfv_end )
-	
\ No newline at end of file
+
-- 
cgit v1.2.3