diff options
Diffstat (limited to 'src/mesa')
| -rw-r--r-- | src/mesa/tnl/t_vtx_api.c | 675 | ||||
| -rw-r--r-- | src/mesa/tnl/t_vtx_api.h | 234 | ||||
| -rw-r--r-- | src/mesa/tnl/t_vtx_exec.c | 636 | ||||
| -rw-r--r-- | src/mesa/tnl/t_vtx_sse.c | 93 | ||||
| -rw-r--r-- | src/mesa/tnl/t_vtx_x86.c | 727 | 
5 files changed, 2365 insertions, 0 deletions
| diff --git a/src/mesa/tnl/t_vtx_api.c b/src/mesa/tnl/t_vtx_api.c new file mode 100644 index 0000000000..8ae0569584 --- /dev/null +++ b/src/mesa/tnl/t_vtx_api.c @@ -0,0 +1,675 @@ +/* $XFree86$ */ +/************************************************************************** + +Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + *   Keith Whitwell <keith@tungstengraphics.com> + */ +#include "mtypes.h" +#include "colormac.h" +#include "simple_list.h" +#include "vtxfmt.h" + +#include "tnl_vtx_api.h" + +/* Fallback versions of all the entrypoints for situations where + * codegen isn't available.  This is slowed significantly by all the + * gumph necessary to get to the tnl pointer. + */ + + +/* MultiTexcoord ends up with both of these branches, unfortunately + * (it may its own version of the macro after size-tracking is working). + */ +#define ATTRF( ATTR, N, A, B, C, D )			\ +{							\ +   GET_CURRENT_CONTEXT( ctx );				\ +   TNLcontext *tnl = TNL_CONTEXT(ctx);			\ +							\ +   if (((ATTR) & 0xf) == 0) {				\ +      int i;						\ +							\ +      if (N>0) tnl->dmaptr[0].f = A;			\ +      if (N>1) tnl->dmaptr[1].f = B;			\ +      if (N>2) tnl->dmaptr[2].f = C;			\ +      if (N>3) tnl->dmaptr[3].f = D;			\ +							\ +      for (i = N; i < tnl->vertex_size; i++)		\ +	 *tnl->dmaptr[i].i = tnl->vertex[i].i;		\ +							\ +      tnl->dmaptr += tnl->vertex_size;			\ +							\ +      if (--tnl->counter == 0)				\ +	 tnl->notify();					\ +   }							\ +   else {						\ +      GLfloat *dest = tnl->attrptr[(ATTR) & 0xf];	\ +      if (N>0) dest[0] = A;				\ +      if (N>1) dest[1] = B;				\ +      if (N>2) dest[2] = C;				\ +      if (N>3) dest[3] = D;				\ +   }							\ +} + +#define ATTR4F( ATTR, A, B, C, D )  ATTRF( ATTR, 4, A, B, C, D ) +#define ATTR3F( ATTR, A, B, C, D )  ATTRF( ATTR, 3, A, B, C, 1 ) +#define ATTR2F( ATTR, A, B, C, D )  ATTRF( ATTR, 2, A, B, 0, 1 ) +#define ATTR1F( ATTR, A, B, C, D )  ATTRF( ATTR, 1, A, 0, 0, 1 ) + +#define ATTR3UB( ATTR, A, B, C )		\ +   ATTR3F( ATTR,				\ +	   UBYTE_TO_FLOAT(A),			\ +	   UBYTE_TO_FLOAT(B),			\ +	   UBYTE_TO_FLOAT(C)) + + +#define ATTR4UB( ATTR, A, B, C, D )		\ +   ATTR4F( ATTR,				\ +	   UBYTE_TO_FLOAT(A),			\ +	   UBYTE_TO_FLOAT(B),			\ +	   UBYTE_TO_FLOAT(C),			\ +	   UBYTE_TO_FLOAT(D)) + + +/* Vertex + */ +static void tnl_Vertex2f( GLfloat x, GLfloat y ) +{ +   ATTR2F( VERT_ATTRIB_POS, x, y );  +} + +static void tnl_Vertex2fv( const GLfloat *v ) +{ +   ATTR2F( VERT_ATTRIB_POS, v[0], v[1] );  +} + +static void tnl_Vertex3f( GLfloat x, GLfloat y, GLfloat z ) +{ +   ATTR3F( VERT_ATTRIB_POS, x, y, z );  +} + +static void tnl_Vertex3fv( const GLfloat *v ) +{ +   ATTR3F( VERT_ATTRIB_POS, v[0], v[1], v[2] );  +} + +static void tnl_Vertex4f( GLfloat x, GLfloat y, GLfloat z, GLfloat w ) +{ +   ATTR4F( VERT_ATTRIB_POS, x, y, z, w );  +} + +static void tnl_Vertex4fv( const GLfloat *v ) +{ +   ATTR4F( VERT_ATTRIB_POS, v[0], v[1], v[2], v[3] );  +} + + +/* Color + */ +static void tnl_Color3ub( GLubyte r, GLubyte g, GLubyte b ) +{ +   ATTR3UB( VERT_ATTRIB_COLOR0, r, g, b ); +} + +static void tnl_Color3ubv( const GLubyte *v ) +{ +   ATTR3UB( VERT_ATTRIB_COLOR0, v[0], v[1], v[2] ); +} + +static void tnl_Color4ub( GLubyte r, GLubyte g, GLubyte b, GLubyte a ) +{ +   ATTR4UB( VERT_ATTRIB_COLOR0, r, g, b, a ); +} + +static void tnl_Color4ubv( const GLubyte *v ) +{ +   ATTR4UB( VERT_ATTRIB_COLOR0, v[0], v[1], v[2], v[3] ); +} + +static void tnl_Color3f( GLfloat r, GLfloat g, GLfloat b ) +{ +   ATTR3F( VERT_ATTRIB_COLOR0, r, g, b ); +} + +static void tnl_Color3fv( const GLfloat *v ) +{ +   ATTR3F( VERT_ATTRIB_COLOR0, v[0], v[1], v[2] ); +} + +static void tnl_Color4f( GLfloat r, GLfloat g, GLfloat b, GLfloat a ) +{ +   ATTR4F( VERT_ATTRIB_COLOR0, r, g, b, a ); +} + +static void tnl_Color4fv( const GLfloat *v ) +{ +   ATTR4F( VERT_ATTRIB_COLOR0, v[0], v[1], v[2], v[3] ); +} + + +/* Secondary Color + */ +static void tnl_SecondaryColor3ubEXT( GLubyte r, GLubyte g, GLubyte b ) +{ +   ATTR3UB( VERT_ATTRIB_COLOR1, r, g, b ); +} + +static void tnl_SecondaryColor3ubvEXT( const GLubyte *v ) +{ +   ATTR3UB( VERT_ATTRIB_COLOR1, v[0], v[1], v[2] ); +} + +static void tnl_SecondaryColor3fEXT( GLfloat r, GLfloat g, GLfloat b ) +{ +   ATTR3F( VERT_ATTRIB_COLOR1, r, g, b ); +} + +static void tnl_SecondaryColor3fvEXT( const GLfloat *v ) +{ +   ATTR3F( VERT_ATTRIB_COLOR1, v[0], v[1], v[2] ); +} + + + +/* Fog Coord + */ +static void tnl_FogCoordfEXT( GLfloat f ) +{ +   ATTR1F( VERT_ATTRIB_FOG, f ); +} + +static void tnl_FogCoordfvEXT( const GLfloat *v ) +{ +   ATTR1F( VERT_ATTRIB_FOG, v[0] ); +} + + + +/* Normal + */ +static void tnl_Normal3f( GLfloat n0, GLfloat n1, GLfloat n2 ) +{ +   ATTR3F( VERT_ATTRIB_NORMAL, n0, n1, n2 ); +} + +static void tnl_Normal3fv( const GLfloat *v ) +{ +   ATTR3F( VERT_ATTRIB_COLOR1, v[0], v[1], v[2] ); +} + + +/* TexCoord + */ +static void tnl_TexCoord1f( GLfloat s ) +{ +   ATTR1F( VERT_ATTRIB_TEX0, s ); +} + +static void tnl_TexCoord1fv( const GLfloat *v ) +{ +   ATTR1F( VERT_ATTRIB_TEX0, v[0] ); +} + +static void tnl_TexCoord2f( GLfloat s, GLfloat t ) +{ +   ATTR2F( VERT_ATTRIB_TEX0, s, t ); +} + +static void tnl_TexCoord2fv( const GLfloat *v ) +{ +   ATTR2F( VERT_ATTRIB_TEX0, v[0], v[1] ); +} + +static void tnl_TexCoord3f( GLfloat s, GLfloat t, GLfloat r ) +{ +   ATTR3F( VERT_ATTRIB_TEX0, s, t, r ); +} + +static void tnl_TexCoord3fv( const GLfloat *v ) +{ +   ATTR3F( VERT_ATTRIB_TEX0, v[0], v[1], v[2] ); +} + +static void tnl_TexCoord4f( GLfloat s, GLfloat t, GLfloat r, GLfloat q ) +{ +   ATTR4F( VERT_ATTRIB_TEX0, s, t, r, q ); +} + +static void tnl_TexCoord4fv( const GLfloat *v ) +{ +   ATTR4F( VERT_ATTRIB_TEX0, v[0], v[1], v[2], v[3] ); +} + + +/* MultiTexcoord + */ +static void tnl_MultiTexCoord1fARB( GLenum target, GLfloat s  ) +{ +   GLint attr = (target - GL_TEXTURE0_ARB) + VERT_ATTRIB_TEX0; +   ATTR1F( attr, s ); +} + +static void tnl_MultiTexCoord1fvARB( GLenum target, const GLfloat *v ) +{ +   GLint attr = (target - GL_TEXTURE0_ARB) + VERT_ATTRIB_TEX0; +   ATTR1F( attr, v[0] ); +} + +static void tnl_MultiTexCoord2fARB( GLenum target, GLfloat s, GLfloat t ) +{ +   GLint attr = (target - GL_TEXTURE0_ARB) + VERT_ATTRIB_TEX0; +   ATTR2F( attr, s, t ); +} + +static void tnl_MultiTexCoord2fvARB( GLenum target, const GLfloat *v ) +{ +   GLint attr = (target - GL_TEXTURE0_ARB) + VERT_ATTRIB_TEX0; +   ATTR2F( attr, v[0], v[1] ); +} + +static void tnl_MultiTexCoord3fARB( GLenum target, GLfloat s, GLfloat t, +				    GLfloat r) +{ +   GLint attr = (target - GL_TEXTURE0_ARB) + VERT_ATTRIB_TEX0; +   ATTR3F( attr, s, t, r ); +} + +static void tnl_MultiTexCoord3fvARB( GLenum target, const GLfloat *v ) +{ +   GLint attr = (target - GL_TEXTURE0_ARB) + VERT_ATTRIB_TEX0; +   ATTR3F( attr, v[0], v[1], v[2] ); +} + +static void tnl_MultiTexCoord4fARB( GLenum target, GLfloat s, GLfloat t, +				    GLfloat r, GLfloat q ) +{ +   GLint attr = (target - GL_TEXTURE0_ARB) + VERT_ATTRIB_TEX0; +   ATTR4F( attr, s, t, r, q ); +} + +static void tnl_MultiTexCoord4fvARB( GLenum target, const GLfloat *v ) +{ +   GLint attr = (target - GL_TEXTURE0_ARB) + VERT_ATTRIB_TEX0; +   ATTR4F( attr, v[0], v[1], v[2], v[3] ); +} + + +/* NV_vertex_program:   + * + * *** Need second dispatch layer above this for size tracking.  One + * *** dispatch layer handles both VertexAttribute and MultiTexCoord  + */ +static void tnl_VertexAttrib1fNV( GLuint index, GLfloat s ) +{ +   ATTR1F( index, s ); +} + +static void tnl_VertexAttrib1fvNV( GLuint index, const GLfloat *v ) +{ +   ATTR1F( index, v[0] ); +} + +static void tnl_VertexAttrib2fNV( GLuint index, GLfloat s, GLfloat t ) +{ +   ATTR2F( index, s, t ); +} + +static void tnl_VertexAttrib2fvNV( GLuint index, const GLfloat *v ) +{ +   ATTR2F( index, v[0], v[1] ); +} + +static void tnl_VertexAttrib3fNV( GLuint index, GLfloat s, GLfloat t,  +				  GLfloat r ) +{ +   ATTR3F( index, s, t, r ); +} + +static void tnl_VertexAttrib3fvNV( GLuint index, const GLfloat *v ) +{ +   ATTR3F( index, v[0], v[1], v[2] ); +} + +static void tnl_VertexAttrib4fNV( GLuint index, GLfloat s, GLfloat t, +				  GLfloat r, GLfloat q ) +{ +   ATTR4F( index, s, t, r, q ); +} + +static void tnl_VertexAttrib4fvNV( GLuint index, const GLfloat *v ) +{ +   ATTR4F( index, v[0], v[1], v[2], v[3] ); +} + + +/* Miscellaneous:  (These don't alias NV attributes, right?) + */ +static void tnl_EdgeFlag( GLboolean flag ) +{ +   GET_TNL; +   tnl->edgeflagptr[0] = flag; +} + +static void tnl_EdgeFlagv( const GLboolean *flag ) +{ +   GET_TNL; +   tnl->edgeflagptr[0] = *flag; +} + +static void tnl_Indexi( GLint idx ) +{ +   GET_TNL; +   tnl->indexptr[0] = idx; +} + +static void tnl_Indexiv( const GLint *idx ) +{ +   GET_TNL; +   tnl->indexptr[0] = *idx; +} + + + +/* Could use dispatch switching to build 'ranges' of eval vertices for + * each type, avoiding need for flags.  (Make + * evalcoords/evalpoints/vertices/attr0 mutually exclusive) + *  --> In which case, may as well use Vertex{12}f{v} here. + */ +static void _tnl_EvalCoord1f( GLfloat u ) +{ +   ATTR1F( VERT_ATTRIB_POS, u ); +} + +static void _tnl_EvalCoord1fv( const GLfloat *v ) +{ +   ATTR1F( VERT_ATTRIB_POS, v[0] ); +} + +static void _tnl_EvalCoord2f( GLfloat u, GLfloat v ) +{ +   ATTR2F( VERT_ATTRIB_POS, u, v ); +} + +static void _tnl_EvalCoord2fv( const GLfloat *v ) +{ +   ATTR2F( VERT_ATTRIB_POS, v[0], v[1] ); +} + + +/* Materials:   + *  *** Treat as more vertex attributes + */ +static void _tnl_Materialfv( GLenum face, GLenum pname,  +			       const GLfloat *params ) +{ +   if (MESA_VERBOSE & DEBUG_VFMT) +      fprintf(stderr, "%s\n", __FUNCTION__); + +   if (tnl->prim[0] != GL_POLYGON+1) { +      VFMT_FALLBACK( __FUNCTION__ ); +      glMaterialfv( face, pname, params ); +      return; +   } +   _mesa_noop_Materialfv( face, pname, params ); +} + + + + +/* Codegen support + */ +static struct dynfn *lookup( struct dynfn *l, int key ) +{ +   struct dynfn *f; + +   foreach( f, l ) { +      if (f->key == key)  +	 return f; +   } + +   return 0; +} + +/* Can't use the loopback template for this: + */ +#define CHOOSE(FN, FNTYPE, MASK, ACTIVE, ARGS1, ARGS2 )			\ +static void choose_##FN ARGS1						\ +{									\ +   int key = tnl->vertex_format & (MASK|ACTIVE);			\ +   struct dynfn *dfn = lookup( &tnl->dfn_cache.FN, key );		\ +									\ +   if (dfn == 0)							\ +      dfn = tnl->codegen.FN( &vb, key );				\ +   else if (MESA_VERBOSE & DEBUG_CODEGEN)				\ +      fprintf(stderr, "%s -- cached codegen\n", __FUNCTION__ );		\ +									\ +   if (dfn)								\ +      tnl->context->Exec->FN = (FNTYPE)(dfn->code);			\ +   else {								\ +      if (MESA_VERBOSE & DEBUG_CODEGEN)					\ +	 fprintf(stderr, "%s -- generic version\n", __FUNCTION__ );	\ +      tnl->context->Exec->FN = tnl_##FN;				\ +   }									\ +									\ +   tnl->context->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT;		\ +   tnl->context->Exec->FN ARGS2;					\ +} + + + +CHOOSE(Normal3f, p3f, 3, VERT_ATTRIB_NORMAL,  +       (GLfloat a,GLfloat b,GLfloat c), (a,b,c)) +CHOOSE(Normal3fv, pfv, 3, VERT_ATTRIB_NORMAL,  +       (const GLfloat *v), (v)) + +CHOOSE(Color4ub, p4ub, 4, VERT_ATTRIB_COLOR0, +	(GLubyte a,GLubyte b, GLubyte c, GLubyte d), (a,b,c,d)) +CHOOSE(Color4ubv, pubv, 4, VERT_ATTRIB_COLOR0,  +	(const GLubyte *v), (v)) +CHOOSE(Color3ub, p3ub, 3, VERT_ATTRIB_COLOR0,  +	(GLubyte a,GLubyte b, GLubyte c), (a,b,c)) +CHOOSE(Color3ubv, pubv, 3, VERT_ATTRIB_COLOR0,  +	(const GLubyte *v), (v)) + +CHOOSE(Color4f, p4f, 4, VERT_ATTRIB_COLOR0,  +	(GLfloat a,GLfloat b, GLfloat c, GLfloat d), (a,b,c,d)) +CHOOSE(Color4fv, pfv, 4, VERT_ATTRIB_COLOR0,  +	(const GLfloat *v), (v)) +CHOOSE(Color3f, p3f, 3, VERT_ATTRIB_COLOR0, +	(GLfloat a,GLfloat b, GLfloat c), (a,b,c)) +CHOOSE(Color3fv, pfv, 3, VERT_ATTRIB_COLOR0, +	(const GLfloat *v), (v)) + + +CHOOSE(SecondaryColor3ubEXT, p3ub, VERT_ATTRIB_COLOR1,  +	(GLubyte a,GLubyte b, GLubyte c), (a,b,c)) +CHOOSE(SecondaryColor3ubvEXT, pubv, VERT_ATTRIB_COLOR1,  +	(const GLubyte *v), (v)) +CHOOSE(SecondaryColor3fEXT, p3f, VERT_ATTRIB_COLOR1, +	(GLfloat a,GLfloat b, GLfloat c), (a,b,c)) +CHOOSE(SecondaryColor3fvEXT, pfv, VERT_ATTRIB_COLOR1, +	(const GLfloat *v), (v)) + +CHOOSE(TexCoord2f, p2f, VERT_ATTRIB_TEX0,  +       (GLfloat a,GLfloat b), (a,b)) +CHOOSE(TexCoord2fv, pfv, VERT_ATTRIB_TEX0,  +       (const GLfloat *v), (v)) +CHOOSE(TexCoord1f, p1f, VERT_ATTRIB_TEX0,  +       (GLfloat a), (a)) +CHOOSE(TexCoord1fv, pfv, VERT_ATTRIB_TEX0,  +       (const GLfloat *v), (v)) + +CHOOSE(MultiTexCoord2fARB, pe2f, VERT_ATTRIB_TEX0, +	 (GLenum u,GLfloat a,GLfloat b), (u,a,b)) +CHOOSE(MultiTexCoord2fvARB, pefv, MASK_ST_ALL, ACTIVE_ST_ALL, +	(GLenum u,const GLfloat *v), (u,v)) +CHOOSE(MultiTexCoord1fARB, pe1f, MASK_ST_ALL, ACTIVE_ST_ALL, +	 (GLenum u,GLfloat a), (u,a)) +CHOOSE(MultiTexCoord1fvARB, pefv, MASK_ST_ALL, ACTIVE_ST_ALL, +	(GLenum u,const GLfloat *v), (u,v)) + +CHOOSE(Vertex3f, p3f, VERT_ATTRIB_POS,  +       (GLfloat a,GLfloat b,GLfloat c), (a,b,c)) +CHOOSE(Vertex3fv, pfv, VERT_ATTRIB_POS,  +       (const GLfloat *v), (v)) +CHOOSE(Vertex2f, p2f, VERT_ATTRIB_POS,  +       (GLfloat a,GLfloat b), (a,b)) +CHOOSE(Vertex2fv, pfv, VERT_ATTRIB_POS,  +       (const GLfloat *v), (v)) + + + + + +void _tnl_InitVtxfmtChoosers( GLvertexformat *vfmt ) +{ +   vfmt->Color3f = choose_Color3f; +   vfmt->Color3fv = choose_Color3fv; +   vfmt->Color3ub = choose_Color3ub; +   vfmt->Color3ubv = choose_Color3ubv; +   vfmt->Color4f = choose_Color4f; +   vfmt->Color4fv = choose_Color4fv; +   vfmt->Color4ub = choose_Color4ub; +   vfmt->Color4ubv = choose_Color4ubv; +   vfmt->SecondaryColor3fEXT = choose_SecondaryColor3fEXT; +   vfmt->SecondaryColor3fvEXT = choose_SecondaryColor3fvEXT; +   vfmt->SecondaryColor3ubEXT = choose_SecondaryColor3ubEXT; +   vfmt->SecondaryColor3ubvEXT = choose_SecondaryColor3ubvEXT; +   vfmt->MultiTexCoord1fARB = choose_MultiTexCoord1fARB; +   vfmt->MultiTexCoord1fvARB = choose_MultiTexCoord1fvARB; +   vfmt->MultiTexCoord2fARB = choose_MultiTexCoord2fARB; +   vfmt->MultiTexCoord2fvARB = choose_MultiTexCoord2fvARB; +   vfmt->Normal3f = choose_Normal3f; +   vfmt->Normal3fv = choose_Normal3fv; +   vfmt->TexCoord1f = choose_TexCoord1f; +   vfmt->TexCoord1fv = choose_TexCoord1fv; +   vfmt->TexCoord2f = choose_TexCoord2f; +   vfmt->TexCoord2fv = choose_TexCoord2fv; +   vfmt->Vertex2f = choose_Vertex2f; +   vfmt->Vertex2fv = choose_Vertex2fv; +   vfmt->Vertex3f = choose_Vertex3f; +   vfmt->Vertex3fv = choose_Vertex3fv; +   vfmt->TexCoord3f = choose_TexCoord3f; +   vfmt->TexCoord3fv = choose_TexCoord3fv; +   vfmt->TexCoord4f = choose_TexCoord4f; +   vfmt->TexCoord4fv = choose_TexCoord4fv; +   vfmt->MultiTexCoord3fARB = choose_MultiTexCoord3fARB; +   vfmt->MultiTexCoord3fvARB = choose_MultiTexCoord3fvARB; +   vfmt->MultiTexCoord4fARB = choose_MultiTexCoord4fARB; +   vfmt->MultiTexCoord4fvARB = choose_MultiTexCoord4fvARB; +   vfmt->Vertex4f = choose_Vertex4f; +   vfmt->Vertex4fv = choose_Vertex4fv; +   vfmt->FogCoordfvEXT = choose_FogCoordfvEXT; +   vfmt->FogCoordfEXT = choose_FogCoordfEXT; +   vfmt->EdgeFlag = choose_EdgeFlag; +   vfmt->EdgeFlagv = choose_EdgeFlagv; +   vfmt->Indexi = choose_Indexi; +   vfmt->Indexiv = choose_Indexiv; +   vfmt->EvalCoord1f = choose_EvalCoord1f; +   vfmt->EvalCoord1fv = choose_EvalCoord1fv; +   vfmt->EvalCoord2f = choose_EvalCoord2f; +   vfmt->EvalCoord2fv = choose_EvalCoord2fv; +   vfmt->EvalMesh1 = choose_EvalMesh1; +   vfmt->EvalMesh2 = choose_EvalMesh2; +   vfmt->EvalPoint1 = choose_EvalPoint1; +   vfmt->EvalPoint2 = choose_EvalPoint2; + +   vfmt->Materialfv = _tnl_Materialfv; +} + + +static struct dynfn *codegen_noop( struct _vb *vb, int key ) +{ +   (void) vb; (void) key; +   return 0; +} + +void _tnl_InitCodegen( struct dfn_generators *gen ) +{ +   gen->Vertex2f = codegen_noop; +   gen->Vertex2fv = codegen_noop; +   gen->Vertex3f = codegen_noop; +   gen->Vertex3fv = codegen_noop; +   gen->Vertex4f = codegen_noop; +   gen->Vertex4fv = codegen_noop; + +   gen->Attr1f = codegen_noop; +   gen->Attr1fv = codegen_noop; +   gen->Attr2f = codegen_noop; +   gen->Attr2fv = codegen_noop; +   gen->Attr3f = codegen_noop; +   gen->Attr3fv = codegen_noop; +   gen->Attr4f = codegen_noop; +   gen->Attr4fv = codegen_noop; +   gen->Attr3ub = codegen_noop; +   gen->Attr3ubv = codegen_noop; +   gen->Attr4ub = codegen_noop; +   gen->Attr4ubv = codegen_noop; + +   /* Probably need two versions of this, one for the front end +    * (double dispatch), one for the back end (do the work) -- but +    * will also need a second level of CHOOSE functions? +    *   -- Generate the dispatch layer using the existing templates somehow. +    *   -- Generate the backend and 2nd level choosers here. +    *   -- No need for a chooser on the top level. +    *   -- Can aliasing help -- ie can NVAttr1f == Attr1f/Vertex2f at this level (index is known) +    */ +   gen->NVAttr1f = codegen_noop; +   gen->NVAttr1fv = codegen_noop; +   gen->NVAttr2f = codegen_noop; +   gen->NVAttr2fv = codegen_noop; +   gen->NVAttr3f = codegen_noop; +   gen->NVAttr3fv = codegen_noop; +   gen->NVAttr4f = codegen_noop; +   gen->NVAttr4fv = codegen_noop; + +   gen->MTAttr1f = codegen_noop; +   gen->MTAttr1fv = codegen_noop; +   gen->MTAttr2f = codegen_noop; +   gen->MTAttr2fv = codegen_noop; +   gen->MTAttr3f = codegen_noop; +   gen->MTAttr3fv = codegen_noop; +   gen->MTAttr4f = codegen_noop; +   gen->MTAttr4fv = codegen_noop; + +   if (!getenv("MESA_NO_CODEGEN")) { +#if defined(USE_X86_ASM) +      _tnl_InitX86Codegen( gen ); +#endif + +#if defined(USE_SSE_ASM) +      _tnl_InitSSECodegen( gen ); +#endif + +#if defined(USE_3DNOW_ASM) +#endif + +#if defined(USE_SPARC_ASM) +#endif +   } +} diff --git a/src/mesa/tnl/t_vtx_api.h b/src/mesa/tnl/t_vtx_api.h new file mode 100644 index 0000000000..6bfdbe8fe3 --- /dev/null +++ b/src/mesa/tnl/t_vtx_api.h @@ -0,0 +1,234 @@ +/* $XFree86$ */ +/************************************************************************** + +Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + *   Keith Whitwell <keith@tungstengraphics.com> + * + */ + +#ifndef __RADEON_VTXFMT_H__ +#define __RADEON_VTXFMT_H__ + +#ifdef GLX_DIRECT_RENDERING + +#include "_tnl__context.h" + +extern void _tnl_UpdateVtxfmt( GLcontext *ctx ); +extern void _tnl_InitVtxfmt( GLcontext *ctx ); +extern void _tnl_InvalidateVtxfmt( GLcontext *ctx ); +extern void _tnl_DestroyVtxfmt( GLcontext *ctx ); + +typedef void (*p4f)( GLfloat, GLfloat, GLfloat, GLfloat ); +typedef void (*p3f)( GLfloat, GLfloat, GLfloat ); +typedef void (*p2f)( GLfloat, GLfloat ); +typedef void (*p1f)( GLfloat ); +typedef void (*pe2f)( GLenum, GLfloat, GLfloat ); +typedef void (*pe1f)( GLenum, GLfloat ); +typedef void (*p4ub)( GLubyte, GLubyte, GLubyte, GLubyte ); +typedef void (*p3ub)( GLubyte, GLubyte, GLubyte ); +typedef void (*pfv)( const GLfloat * ); +typedef void (*pefv)( GLenum, const GLfloat * ); +typedef void (*pubv)( const GLubyte * ); + +/* Want to keep a cache of these around.  Each is parameterized by + * only a single value which has only a small range.  Only expect a + * few, so just rescan the list each time? + */ +struct dynfn { +   struct dynfn *next, *prev; +   int key; +   char *code; +}; + +struct dfn_lists { +   struct dynfn Vertex2f; +   struct dynfn Vertex2fv; +   struct dynfn Vertex3f; +   struct dynfn Vertex3fv; +   struct dynfn Color4ub; +   struct dynfn Color4ubv; +   struct dynfn Color3ub; +   struct dynfn Color3ubv; +   struct dynfn Color4f; +   struct dynfn Color4fv; +   struct dynfn Color3f; +   struct dynfn Color3fv; +   struct dynfn SecondaryColor3ubEXT; +   struct dynfn SecondaryColor3ubvEXT; +   struct dynfn SecondaryColor3fEXT; +   struct dynfn SecondaryColor3fvEXT; +   struct dynfn Normal3f; +   struct dynfn Normal3fv; +   struct dynfn TexCoord2f; +   struct dynfn TexCoord2fv; +   struct dynfn TexCoord1f; +   struct dynfn TexCoord1fv; +   struct dynfn MultiTexCoord2fARB; +   struct dynfn MultiTexCoord2fvARB; +   struct dynfn MultiTexCoord1fARB; +   struct dynfn MultiTexCoord1fvARB; +}; + +struct _vb; + +struct dfn_generators { +   struct dynfn *(*Vertex2f)( struct _vb *, int ); +   struct dynfn *(*Vertex2fv)( struct _vb *, int ); +   struct dynfn *(*Vertex3f)( struct _vb *, int ); +   struct dynfn *(*Vertex3fv)( struct _vb *, int ); +   struct dynfn *(*Color4ub)( struct _vb *, int ); +   struct dynfn *(*Color4ubv)( struct _vb *, int ); +   struct dynfn *(*Color3ub)( struct _vb *, int ); +   struct dynfn *(*Color3ubv)( struct _vb *, int ); +   struct dynfn *(*Color4f)( struct _vb *, int ); +   struct dynfn *(*Color4fv)( struct _vb *, int ); +   struct dynfn *(*Color3f)( struct _vb *, int ); +   struct dynfn *(*Color3fv)( struct _vb *, int ); +   struct dynfn *(*SecondaryColor3ubEXT)( struct _vb *, int ); +   struct dynfn *(*SecondaryColor3ubvEXT)( struct _vb *, int ); +   struct dynfn *(*SecondaryColor3fEXT)( struct _vb *, int ); +   struct dynfn *(*SecondaryColor3fvEXT)( struct _vb *, int ); +   struct dynfn *(*Normal3f)( struct _vb *, int ); +   struct dynfn *(*Normal3fv)( struct _vb *, int ); +   struct dynfn *(*TexCoord2f)( struct _vb *, int ); +   struct dynfn *(*TexCoord2fv)( struct _vb *, int ); +   struct dynfn *(*TexCoord1f)( struct _vb *, int ); +   struct dynfn *(*TexCoord1fv)( struct _vb *, int ); +   struct dynfn *(*MultiTexCoord2fARB)( struct _vb *, int ); +   struct dynfn *(*MultiTexCoord2fvARB)( struct _vb *, int ); +   struct dynfn *(*MultiTexCoord1fARB)( struct _vb *, int ); +   struct dynfn *(*MultiTexCoord1fvARB)( struct _vb *, int ); +}; + +struct prim { +   GLuint start; +   GLuint end; +   GLuint prim; +}; + +#define _TNL__MAX_PRIMS 64 + + + +struct tnl_vbinfo { +   /* Keep these first: referenced from codegen templates: +    */ +   GLint counter; +   GLint *dmaptr; +   void (*notify)( void ); +   union { float f; int i; GLubyte ub4[4]; } vertex[16*4]; + +   GLfloat *attrptr[16]; +   GLuint size[16]; + +   GLenum *prim;		/* &ctx->Driver.CurrentExecPrimitive */ +   GLuint primflags; + +   GLboolean installed; +   GLboolean recheck; + +   GLint vertex_size; +   GLint initial_counter; +   GLint nrverts; +   GLuint vertex_format; + +   GLuint installed_vertex_format; + +   struct prim primlist[RADEON_MAX_PRIMS]; +   int nrprims; + +   struct dfn_lists dfn_cache; +   struct dfn_generators codegen; +   GLvertexformat vtxfmt; +}; + + +extern void _tnl_InitVtxfmtChoosers( GLvertexformat *vfmt ); + + +#define FIXUP( CODE, OFFSET, CHECKVAL, NEWVAL )	\ +do {						\ +   int *icode = (int *)(CODE+OFFSET);		\ +   assert (*icode == CHECKVAL);			\ +   *icode = (int)NEWVAL;			\ +} while (0) + + +/* Useful for figuring out the offsets: + */ +#define FIXUP2( CODE, OFFSET, CHECKVAL, NEWVAL )		\ +do {								\ +   while (*(int *)(CODE+OFFSET) != CHECKVAL) OFFSET++;		\ +   fprintf(stderr, "%s/%d CVAL %x OFFSET %d\n", __FUNCTION__,	\ +	   __LINE__, CHECKVAL, OFFSET);				\ +   *(int *)(CODE+OFFSET) = (int)NEWVAL;				\ +   OFFSET += 4;							\ +} while (0) + +/*  + */ +void _tnl_InitCodegen( struct dfn_generators *gen ); +void _tnl_InitX86Codegen( struct dfn_generators *gen ); +void _tnl_InitSSECodegen( struct dfn_generators *gen ); + +void _tnl_copy_to_current( GLcontext *ctx ); + + +/* Defined in tnl_vtxfmt_c.c. + */ +struct dynfn *tnl_makeX86Vertex2f( TNLcontext *, int ); +struct dynfn *tnl_makeX86Vertex2fv( TNLcontext *, int ); +struct dynfn *tnl_makeX86Vertex3f( TNLcontext *, int ); +struct dynfn *tnl_makeX86Vertex3fv( TNLcontext *, int ); +struct dynfn *tnl_makeX86Color4ub( TNLcontext *, int ); +struct dynfn *tnl_makeX86Color4ubv( TNLcontext *, int ); +struct dynfn *tnl_makeX86Color3ub( TNLcontext *, int ); +struct dynfn *tnl_makeX86Color3ubv( TNLcontext *, int ); +struct dynfn *tnl_makeX86Color4f( TNLcontext *, int ); +struct dynfn *tnl_makeX86Color4fv( TNLcontext *, int ); +struct dynfn *tnl_makeX86Color3f( TNLcontext *, int ); +struct dynfn *tnl_makeX86Color3fv( TNLcontext *, int ); +struct dynfn *tnl_makeX86SecondaryColor3ubEXT( TNLcontext *, int ); +struct dynfn *tnl_makeX86SecondaryColor3ubvEXT( TNLcontext *, int ); +struct dynfn *tnl_makeX86SecondaryColor3fEXT( TNLcontext *, int ); +struct dynfn *tnl_makeX86SecondaryColor3fvEXT( TNLcontext *, int ); +struct dynfn *tnl_makeX86Normal3f( TNLcontext *, int ); +struct dynfn *tnl_makeX86Normal3fv( TNLcontext *, int ); +struct dynfn *tnl_makeX86TexCoord2f( TNLcontext *, int ); +struct dynfn *tnl_makeX86TexCoord2fv( TNLcontext *, int ); +struct dynfn *tnl_makeX86TexCoord1f( TNLcontext *, int ); +struct dynfn *tnl_makeX86TexCoord1fv( TNLcontext *, int ); +struct dynfn *tnl_makeX86MultiTexCoord2fARB( TNLcontext *, int ); +struct dynfn *tnl_makeX86MultiTexCoord2fvARB( TNLcontext *, int ); +struct dynfn *tnl_makeX86MultiTexCoord1fARB( TNLcontext *, int ); +struct dynfn *tnl_makeX86MultiTexCoord1fvARB( TNLcontext *, int ); + + +#endif +#endif diff --git a/src/mesa/tnl/t_vtx_exec.c b/src/mesa/tnl/t_vtx_exec.c new file mode 100644 index 0000000000..8470d6ab35 --- /dev/null +++ b/src/mesa/tnl/t_vtx_exec.c @@ -0,0 +1,636 @@ +/* $XFree86$ */ +/************************************************************************** + +Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + *   Keith Whitwell <keith@tungstengraphics.com> + * + */ +#include "api_noop.h" +#include "api_arrayelt.h" +#include "context.h" +#include "mem.h" +#include "mmath.h" +#include "mtypes.h" +#include "enums.h" +#include "glapi.h" +#include "colormac.h" +#include "light.h" +#include "state.h" +#include "vtxfmt.h" + +#include "tnl/tnl.h" +#include "tnl/t_context.h" +#include "tnl/t_array_api.h" + +static void _tnl_FlushVertices( GLcontext *, GLuint ); + + +void tnl_copy_to_current( GLcontext *ctx )  +{ +   TNLcontext *tnl = TNL_CONTEXT(ctx); +   GLuint flag = tnl->vertex_format; +   GLint i; + +   assert(ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT); + +   for (i = 0 ; i < 16 ; i++) +      if (flag & (1<<i)) +	 COPY_4FV( ctx->Current.Attrib[i], tnl->attribptr[i] ); + +   if (flag & VERT_BIT_INDEX) +      ctx->Current.Index = tnl->indexptr[0]; + +   if (flag & VERT_BIT_EDGEFLAG) +      ctx->Current.EdgeFlag = tnl->edgeflagptr[0]; + +   if (flag & VERT_BIT_MATERIAL) { +      _mesa_update_material( ctx, +			  IM->Material[IM->LastMaterial], +			  IM->MaterialOrMask ); + +      tnl->Driver.NotifyMaterialChange( ctx ); +   } + + +   ctx->Driver.NeedFlush &= ~FLUSH_UPDATE_CURRENT; +} + +static GLboolean discreet_gl_prim[GL_POLYGON+1] = { +   1,				/* 0 points */ +   1,				/* 1 lines */ +   0,				/* 2 line_strip */ +   0,				/* 3 line_loop */ +   1,				/* 4 tris */ +   0,				/* 5 tri_fan */ +   0,				/* 6 tri_strip */ +   1,				/* 7 quads */ +   0,				/* 8 quadstrip */ +   0,				/* 9 poly */ +}; + +/* Optimize the primitive list:  ONLY FOR EXECUTE ATM + */ +static void optimize_prims( TNLcontext *tnl ) +{ +   int i, j; + +   if (tnl->nrprims <= 1) +      return; + +   for (j = 0, i = 1 ; i < tnl->nrprims; i++) { +      int pj = tnl->primlist[j].prim & 0xf; +      int pi = tnl->primlist[i].prim & 0xf; +       +      if (pj == pi && discreet_gl_prim[pj] && +	  tnl->primlist[i].start == tnl->primlist[j].end) { +	 tnl->primlist[j].end = tnl->primlist[i].end; +      } +      else { +	 j++; +	 if (j != i) tnl->primlist[j] = tnl->primlist[i]; +      } +   } + +   tnl->nrprims = j+1; +} + + +/* Bind vertex buffer pointers, run pipeline: + */ +static void flush_prims( TNLcontext *tnl ) +{ +   int i,j; + +   tnl->dma.current.ptr = tnl->dma.current.start +=  +      (tnl->initial_counter - tnl->counter) * tnl->vertex_size * 4;  + +   tnl->tcl.vertex_format = tnl->vertex_format; +   tnl->tcl.aos_components[0] = &tmp; +   tnl->tcl.nr_aos_components = 1; +   tnl->dma.flush = 0; + +   tnl->Driver.RunPipeline( ... ); +    +   tnl->nrprims = 0; +} + + +static void start_prim( TNLcontext *tnl, GLuint mode ) +{ +   if (MESA_VERBOSE & DEBUG_VFMT) +      fprintf(stderr, "%s %d\n", __FUNCTION__,  +	      tnl->initial_counter - tnl->counter); + +   tnl->primlist[tnl->nrprims].start = tnl->initial_counter - tnl->counter; +   tnl->primlist[tnl->nrprims].prim = mode; +} + +static void note_last_prim( TNLcontext *tnl, GLuint flags ) +{ +   if (MESA_VERBOSE & DEBUG_VFMT) +      fprintf(stderr, "%s %d\n", __FUNCTION__,  +	      tnl->initial_counter - tnl->counter); + +   if (tnl->prim[0] != GL_POLYGON+1) { +      tnl->primlist[tnl->nrprims].prim |= flags; +      tnl->primlist[tnl->nrprims].end = tnl->initial_counter - tnl->counter; + +      if (++tnl->nrprims == TNL_MAX_PRIMS) +	 flush_prims( tnl ); +   } +} + + +static void copy_vertex( TNLcontext *tnl, GLuint n, GLfloat *dst ) +{ +   GLuint i; +   GLfloat *src = (GLfloat *)(tnl->dma.current.address +  +			      tnl->dma.current.ptr +  +			      (tnl->primlist[tnl->nrprims].start + n) *  +			      tnl->vertex_size * 4); + +   if (MESA_VERBOSE & DEBUG_VFMT)  +      fprintf(stderr, "copy_vertex %d\n",  +	      tnl->primlist[tnl->nrprims].start + n); + +   for (i = 0 ; i < tnl->vertex_size; i++) { +      dst[i] = src[i]; +   } +} + +/* NOTE: This actually reads the copied vertices back from uncached + * memory.  Could also use the counter/notify mechanism to populate + * tmp on the fly as vertices are generated.   + */ +static GLuint copy_wrapped_verts( TNLcontext *tnl, GLfloat (*tmp)[15] ) +{ +   GLuint ovf, i; +   GLuint nr = (tnl->initial_counter - tnl->counter) - tnl->primlist[tnl->nrprims].start; + +   if (MESA_VERBOSE & DEBUG_VFMT) +      fprintf(stderr, "%s %d verts\n", __FUNCTION__, nr); + +   switch( tnl->prim[0] ) +   { +   case GL_POINTS: +      return 0; +   case GL_LINES: +      ovf = nr&1; +      for (i = 0 ; i < ovf ; i++) +	 copy_vertex( tnl, nr-ovf+i, tmp[i] ); +      return i; +   case GL_TRIANGLES: +      ovf = nr%3; +      for (i = 0 ; i < ovf ; i++) +	 copy_vertex( tnl, nr-ovf+i, tmp[i] ); +      return i; +   case GL_QUADS: +      ovf = nr&3; +      for (i = 0 ; i < ovf ; i++) +	 copy_vertex( tnl, nr-ovf+i, tmp[i] ); +      return i; +   case GL_LINE_STRIP: +      if (nr == 0)  +	 return 0; +      copy_vertex( tnl, nr-1, tmp[0] ); +      return 1; +   case GL_LINE_LOOP: +   case GL_TRIANGLE_FAN: +   case GL_POLYGON: +      if (nr == 0)  +	 return 0; +      else if (nr == 1) { +	 copy_vertex( tnl, 0, tmp[0] ); +	 return 1; +      } else { +	 copy_vertex( tnl, 0, tmp[0] ); +	 copy_vertex( tnl, nr-1, tmp[1] ); +	 return 2; +      } +   case GL_TRIANGLE_STRIP: +      ovf = MIN2( nr-1, 2 ); +      for (i = 0 ; i < ovf ; i++) +	 copy_vertex( tnl, nr-ovf+i, tmp[i] ); +      return i; +   case GL_QUAD_STRIP: +      ovf = MIN2( nr-1, 2 ); +      if (nr > 2) ovf += nr&1; +      for (i = 0 ; i < ovf ; i++) +	 copy_vertex( tnl, nr-ovf+i, tmp[i] ); +      return i; +   default: +      assert(0); +      return 0; +   } +} + + + +/* Extend for vertex-format changes on wrap: + */ +static void wrap_buffer( void ) +{ +   TNLcontext *tnl = tnl->tnl; +   GLfloat tmp[3][15]; +   GLuint i, nrverts; + +   if (MESA_VERBOSE & (DEBUG_VFMT|DEBUG_PRIMS)) +      fprintf(stderr, "%s %d\n", __FUNCTION__,  +	      tnl->initial_counter - tnl->counter); + +   /* Don't deal with parity.  *** WONT WORK FOR COMPILE +    */ +   if ((((tnl->initial_counter - tnl->counter) -   +	 tnl->primlist[tnl->nrprims].start) & 1)) { +      tnl->counter++; +      tnl->initial_counter++; +      return; +   } + +   /* Copy vertices out of dma: +    */ +   nrverts = copy_dma_verts( tnl, tmp ); + +   if (MESA_VERBOSE & DEBUG_VFMT) +      fprintf(stderr, "%d vertices to copy\n", nrverts); +    + +   /* Finish the prim at this point: +    */ +   note_last_prim( tnl, 0 ); +   flush_prims( tnl ); + +   /* Reset counter, dmaptr +    */ +   tnl->dmaptr = (int *)(tnl->dma.current.ptr + tnl->dma.current.address); +   tnl->counter = (tnl->dma.current.end - tnl->dma.current.ptr) /  +      (tnl->vertex_size * 4); +   tnl->counter--; +   tnl->initial_counter = tnl->counter; +   tnl->notify = wrap_buffer; + +   tnl->dma.flush = flush_prims; +   start_prim( tnl, tnl->prim[0] ); + + +   /* Reemit saved vertices  +    * *** POSSIBLY IN NEW FORMAT +    *       --> Can't always extend at end of vertex? +    */ +   for (i = 0 ; i < nrverts; i++) { +      if (MESA_VERBOSE & DEBUG_VERTS) { +	 int j; +	 fprintf(stderr, "re-emit vertex %d to %p\n", i, tnl->dmaptr); +	 if (MESA_VERBOSE & DEBUG_VERBOSE) +	    for (j = 0 ; j < tnl->vertex_size; j++)  +	       fprintf(stderr, "\t%08x/%f\n", *(int*)&tmp[i][j], tmp[i][j]); +      } + +      memcpy( tnl->dmaptr, tmp[i], tnl->vertex_size * 4 ); +      tnl->dmaptr += tnl->vertex_size; +      tnl->counter--; +   } +} + + + +/* Always follow data, don't try to predict what's necessary.   + */ +static GLboolean check_vtx_fmt( GLcontext *ctx ) +{ +   TNLcontext *tnl = TNL_CONTEXT(ctx); + +   if (ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT)  +      ctx->Driver.FlushVertices( ctx, FLUSH_UPDATE_CURRENT ); +    + +   TNL_NEWPRIM(tnl); +   tnl->vertex_format = VERT_BIT_POS; +   tnl->prim = &ctx->Driver.CurrentExecPrimitive; + + +   /* Currently allow the full 4 components per attrib.  Can use the +    * mechanism from radeon driver color handling to reduce this (and +    * also to store ubyte colors where these are incoming).  This +    * won't work for compile mode. +    * +    * Only adding components when they are first received eliminates +    * the need for displaylist fixup, as there are no 'empty' slots +    * at the start of buffers.   +    */ +   for (i = 0 ; i < 16 ; i++) { +      if (ind & (1<<i)) { +	 tnl->attribptr[i] = &tnl->vertex[tnl->vertex_size].f; +	 tnl->vertex_size += 4; +	 tnl->attribptr[i][0] = ctx->Current.Attrib[i][0]; +	 tnl->attribptr[i][1] = ctx->Current.Attrib[i][1]; +	 tnl->attribptr[i][2] = ctx->Current.Attrib[i][2]; +	 tnl->attribptr[i][3] = ctx->Current.Attrib[i][3]; +      } +      else +	 tnl->attribptr[i] = ctx->Current.Attrib[i]; +   } + +   /* Edgeflag, Index: +    */ +   for (i = 16 ; i < 18 ; i++) +      ; + +   /* Materials: +    */ +   for (i = 18 ; i < 28 ; i++) +      ; + +   /* Eval: +    */ +   for (i = 28 ; i < 29 ; i++) +      ; +	    + +   if (tnl->installed_vertex_format != tnl->vertex_format) { +      if (MESA_VERBOSE & DEBUG_VFMT) +	 fprintf(stderr, "reinstall on vertex_format change\n"); +      _mesa_install_exec_vtxfmt( ctx, &tnl->vtxfmt ); +      tnl->installed_vertex_format = tnl->vertex_format; +   } + +   return GL_TRUE; +} + + +void _tnl_InvalidateVtxfmt( GLcontext *ctx ) +{ +   tnl->recheck = GL_TRUE; +   tnl->fell_back = GL_FALSE; +} + + + + +static void _tnl_ValidateVtxfmt( GLcontext *ctx ) +{ +   if (MESA_VERBOSE & DEBUG_VFMT) +      fprintf(stderr, "%s\n", __FUNCTION__); + +   if (ctx->Driver.NeedFlush) +      ctx->Driver.FlushVertices( ctx, ctx->Driver.NeedFlush ); + +   tnl->recheck = GL_FALSE; + +   if (check_vtx_fmt( ctx )) { +      if (!tnl->installed) { +	 if (MESA_VERBOSE & DEBUG_VFMT) +	    fprintf(stderr, "reinstall (new install)\n"); + +	 _mesa_install_exec_vtxfmt( ctx, &tnl->vtxfmt ); +	 ctx->Driver.FlushVertices = _tnl_FlushVertices; +	 tnl->installed = GL_TRUE; +      } +      else +	 fprintf(stderr, "%s: already installed", __FUNCTION__); +   }  +   else { +      if (MESA_VERBOSE & DEBUG_VFMT) +	 fprintf(stderr, "%s: failed\n", __FUNCTION__); + +      if (tnl->installed) { +	 if (tnl->tnl->dma.flush) +	    tnl->tnl->dma.flush( tnl->tnl ); +	 _tnl_wakeup_exec( ctx ); +	 tnl->installed = GL_FALSE; +      } +   }       +} + + + + + +/* Begin/End + */ +static void _tnl_Begin( GLenum mode ) +{ +   GLcontext *ctx = tnl->context; +   TNLcontext *tnl = tnl->tnl; +    +   if (MESA_VERBOSE & DEBUG_VFMT) +      fprintf(stderr, "%s\n", __FUNCTION__); + +   if (mode > GL_POLYGON) { +      _mesa_error( ctx, GL_INVALID_ENUM, "glBegin" ); +      return; +   } + +   if (tnl->prim[0] != GL_POLYGON+1) { +      _mesa_error( ctx, GL_INVALID_OPERATION, "glBegin" ); +      return; +   } +    +   if (ctx->NewState)  +      _mesa_update_state( ctx ); + +   if (tnl->recheck)  +      _tnl_ValidateVtxfmt( ctx ); + +   if (tnl->dma.flush && tnl->counter < 12) { +      if (MESA_VERBOSE & DEBUG_VFMT) +	 fprintf(stderr, "%s: flush almost-empty buffers\n", __FUNCTION__); +      flush_prims( tnl ); +   } + +   if (!tnl->dma.flush) { +      if (tnl->dma.current.ptr + 12*tnl->vertex_size*4 >  +	  tnl->dma.current.end) { +	 TNL_NEWPRIM( tnl ); +	 _tnl_RefillCurrentDmaRegion( tnl ); +      } + +      tnl->dmaptr = (int *)(tnl->dma.current.address + tnl->dma.current.ptr); +      tnl->counter = (tnl->dma.current.end - tnl->dma.current.ptr) /  +	 (tnl->vertex_size * 4); +      tnl->counter--; +      tnl->initial_counter = tnl->counter; +      tnl->notify = wrap_buffer; +      tnl->dma.flush = flush_prims; +      tnl->context->Driver.NeedFlush |= FLUSH_STORED_VERTICES; +   } +    +    +   tnl->prim[0] = mode; +   start_prim( tnl, mode | PRIM_BEGIN ); +} + + + + + +static void _tnl_End( void ) +{ +   TNLcontext *tnl = tnl->tnl; +   GLcontext *ctx = tnl->context; + +   if (MESA_VERBOSE & DEBUG_VFMT) +      fprintf(stderr, "%s\n", __FUNCTION__); + +   if (tnl->prim[0] == GL_POLYGON+1) { +      _mesa_error( ctx, GL_INVALID_OPERATION, "glEnd" ); +      return; +   } +	   +   note_last_prim( tnl, PRIM_END ); +   tnl->prim[0] = GL_POLYGON+1; +} + + +static void _tnl_FlushVertices( GLcontext *ctx, GLuint flags ) +{ +   if (MESA_VERBOSE & DEBUG_VFMT) +      fprintf(stderr, "%s\n", __FUNCTION__); + +   assert(tnl->installed); + +   if (flags & FLUSH_UPDATE_CURRENT) { +      _tnl_copy_to_current( ctx ); +      if (MESA_VERBOSE & DEBUG_VFMT) +	 fprintf(stderr, "reinstall on update_current\n"); +      _mesa_install_exec_vtxfmt( ctx, &tnl->vtxfmt ); +      ctx->Driver.NeedFlush &= ~FLUSH_UPDATE_CURRENT; +   } + +   if (flags & FLUSH_STORED_VERTICES) { +      TNLcontext *tnl = TNL_CONTEXT( ctx ); +      assert (tnl->dma.flush == 0 || +	      tnl->dma.flush == flush_prims); +      if (tnl->dma.flush == flush_prims) +	 flush_prims( TNL_CONTEXT( ctx ) ); +      ctx->Driver.NeedFlush &= ~FLUSH_STORED_VERTICES; +   } +} + + + +/* At this point, don't expect very many versions of each function to + * be generated, so not concerned about freeing them? + */ + + +static void _tnl_InitVtxfmt( GLcontext *ctx ) +{ +   GLvertexformat *vfmt = &(tnl->vtxfmt); + +   MEMSET( vfmt, 0, sizeof(GLvertexformat) ); + +   /* Hook in chooser functions for codegen, etc: +    */ +   _tnl_InitVtxfmtChoosers( vfmt ); + +   /* Handled fully in supported states, but no codegen: +    */ +   vfmt->ArrayElement = _ae_loopback_array_elt;	        /* generic helper */ +   vfmt->Rectf = _mesa_noop_Rectf;			/* generic helper */ +   vfmt->Begin = _tnl_Begin; +   vfmt->End = _tnl_End; +    +   tnl->context = ctx; +   tnl->tnl = TNL_CONTEXT(ctx); +   tnl->prim = &ctx->Driver.CurrentExecPrimitive; +   tnl->primflags = 0; + +   make_empty_list( &tnl->dfn_cache.Vertex2f ); +   make_empty_list( &tnl->dfn_cache.Vertex2fv ); +   make_empty_list( &tnl->dfn_cache.Vertex3f ); +   make_empty_list( &tnl->dfn_cache.Vertex3fv ); +   make_empty_list( &tnl->dfn_cache.Color4ub ); +   make_empty_list( &tnl->dfn_cache.Color4ubv ); +   make_empty_list( &tnl->dfn_cache.Color3ub ); +   make_empty_list( &tnl->dfn_cache.Color3ubv ); +   make_empty_list( &tnl->dfn_cache.Color4f ); +   make_empty_list( &tnl->dfn_cache.Color4fv ); +   make_empty_list( &tnl->dfn_cache.Color3f ); +   make_empty_list( &tnl->dfn_cache.Color3fv ); +   make_empty_list( &tnl->dfn_cache.SecondaryColor3fEXT ); +   make_empty_list( &tnl->dfn_cache.SecondaryColor3fvEXT ); +   make_empty_list( &tnl->dfn_cache.SecondaryColor3ubEXT ); +   make_empty_list( &tnl->dfn_cache.SecondaryColor3ubvEXT ); +   make_empty_list( &tnl->dfn_cache.Normal3f ); +   make_empty_list( &tnl->dfn_cache.Normal3fv ); +   make_empty_list( &tnl->dfn_cache.TexCoord2f ); +   make_empty_list( &tnl->dfn_cache.TexCoord2fv ); +   make_empty_list( &tnl->dfn_cache.TexCoord1f ); +   make_empty_list( &tnl->dfn_cache.TexCoord1fv ); +   make_empty_list( &tnl->dfn_cache.MultiTexCoord2fARB ); +   make_empty_list( &tnl->dfn_cache.MultiTexCoord2fvARB ); +   make_empty_list( &tnl->dfn_cache.MultiTexCoord1fARB ); +   make_empty_list( &tnl->dfn_cache.MultiTexCoord1fvARB ); + +   _tnl_InitCodegen( &tnl->codegen ); +} + +static void free_funcs( struct dynfn *l ) +{ +   struct dynfn *f, *tmp; +   foreach_s (f, tmp, l) { +      remove_from_list( f ); +      ALIGN_FREE( f->code ); +      FREE( f ); +   } +} + + +static void _tnl_DestroyVtxfmt( GLcontext *ctx ) +{ +   count_funcs(); +   free_funcs( &tnl->dfn_cache.Vertex2f ); +   free_funcs( &tnl->dfn_cache.Vertex2fv ); +   free_funcs( &tnl->dfn_cache.Vertex3f ); +   free_funcs( &tnl->dfn_cache.Vertex3fv ); +   free_funcs( &tnl->dfn_cache.Color4ub ); +   free_funcs( &tnl->dfn_cache.Color4ubv ); +   free_funcs( &tnl->dfn_cache.Color3ub ); +   free_funcs( &tnl->dfn_cache.Color3ubv ); +   free_funcs( &tnl->dfn_cache.Color4f ); +   free_funcs( &tnl->dfn_cache.Color4fv ); +   free_funcs( &tnl->dfn_cache.Color3f ); +   free_funcs( &tnl->dfn_cache.Color3fv ); +   free_funcs( &tnl->dfn_cache.SecondaryColor3ubEXT ); +   free_funcs( &tnl->dfn_cache.SecondaryColor3ubvEXT ); +   free_funcs( &tnl->dfn_cache.SecondaryColor3fEXT ); +   free_funcs( &tnl->dfn_cache.SecondaryColor3fvEXT ); +   free_funcs( &tnl->dfn_cache.Normal3f ); +   free_funcs( &tnl->dfn_cache.Normal3fv ); +   free_funcs( &tnl->dfn_cache.TexCoord2f ); +   free_funcs( &tnl->dfn_cache.TexCoord2fv ); +   free_funcs( &tnl->dfn_cache.TexCoord1f ); +   free_funcs( &tnl->dfn_cache.TexCoord1fv ); +   free_funcs( &tnl->dfn_cache.MultiTexCoord2fARB ); +   free_funcs( &tnl->dfn_cache.MultiTexCoord2fvARB ); +   free_funcs( &tnl->dfn_cache.MultiTexCoord1fARB ); +   free_funcs( &tnl->dfn_cache.MultiTexCoord1fvARB ); +} + diff --git a/src/mesa/tnl/t_vtx_sse.c b/src/mesa/tnl/t_vtx_sse.c new file mode 100644 index 0000000000..240d6cf8b9 --- /dev/null +++ b/src/mesa/tnl/t_vtx_sse.c @@ -0,0 +1,93 @@ +/* $XFree86$ */ +/************************************************************************** + +Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + *   Keith Whitwell <keith@tungstengraphics.com> + */ + +#include <stdio.h> +#include <assert.h> +#include "mem.h"  +#include "simple_list.h"  +#include "t_vtx_api.h" + +#if defined(USE_SSE_ASM) + +/* Build specialized versions of the immediate calls on the fly for + * the current state.  ???P4 SSE2 versions??? + */ + + +static struct dynfn *makeSSENormal3fv( struct _vb *vb, int key ) +{ +   /* Requires P4 (sse2?) +    */ +   static unsigned char temp[] = { +      0x8b, 0x44, 0x24, 0x04,          	/*  mov    0x4(%esp,1),%eax */ +      0xba, 0x78, 0x56, 0x34, 0x12,   	/*  mov    $0x12345678,%edx */ +      0xf3, 0x0f, 0x7e, 0x00,          	/*  movq   (%eax),%xmm0 */ +      0x66, 0x0f, 0x6e, 0x48, 0x08,    	/*  movd   0x8(%eax),%xmm1 */ +      0x66, 0x0f, 0xd6, 0x42, 0x0c,    	/*  movq   %xmm0,0xc(%edx) */ +      0x66, 0x0f, 0x7e, 0x4a, 0x14,    	/*  movd   %xmm1,0x14(%edx) */ +      0xc3,                   	        /*  ret     */ +   }; + + +   struct dynfn *dfn = MALLOC_STRUCT( dynfn ); +   insert_at_head( &vb->dfn_cache.Normal3fv, dfn ); +   dfn->key = key; + +   dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); +   memcpy (dfn->code, temp, sizeof(temp)); +   FIXUP(dfn->code, 5, 0x0, (int)vb->normalptr);  +   return dfn; +} + +void _tnl_InitSSECodegen( struct dfn_generators *gen ) +{ +   /* Need to:  +    *    - check kernel sse support +    *    - check p4/sse2 +    */ +   (void) makeSSENormal3fv; +} + + +#else  + +void _tnl_InitSSECodegen( struct dfn_generators *gen ) +{ +   (void) gen; +} + +#endif + + + + diff --git a/src/mesa/tnl/t_vtx_x86.c b/src/mesa/tnl/t_vtx_x86.c new file mode 100644 index 0000000000..4713a325bf --- /dev/null +++ b/src/mesa/tnl/t_vtx_x86.c @@ -0,0 +1,727 @@ +/* $XFree86$ */ +/************************************************************************** + +Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + *   Keith Whitwell <keith@tungstengraphics.com> + */ + +#include <stdio.h> +#include <assert.h> +#include "mem.h"  +#include "mmath.h"  +#include "simple_list.h"  +#include "tnl_vtxfmt.h" + +#if defined(USE_X86_ASM) + +/* Build specialized versions of the immediate calls on the fly for + * the current state.  Generic x86 versions. + */ + +struct dynfn *tnl_makeX86Vertex3f( TNLcontext *tnl, int key ) +{ +   struct dynfn *dfn = MALLOC_STRUCT( dynfn ); + +   if (RADEON_DEBUG & DEBUG_CODEGEN) +      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key ); + +   switch (tnl->vertex_size) { +   case 4: { +      static  char temp[] = { +	 0x8b, 0x0d, 0,0,0,0,    	/* mov    DMAPTR,%ecx */ +	 0x8b, 0x44, 0x24, 0x04,       	/* mov    0x4(%esp,1),%eax */ +	 0x8b, 0x54, 0x24, 0x08,       	/* mov    0x8(%esp,1),%edx */ +	 0x89, 0x01,                	/* mov    %eax,(%ecx) */ +	 0x89, 0x51, 0x04,             	/* mov    %edx,0x4(%ecx) */ +	 0x8b, 0x44, 0x24, 0x0c,       	/* mov    0xc(%esp,1),%eax */ +	 0x8b, 0x15, 0,0,0,0,    	/* mov    VERTEX[3],%edx */ +	 0x89, 0x41, 0x08,             	/* mov    %eax,0x8(%ecx) */ +	 0x89, 0x51, 0x0c,             	/* mov    %edx,0xc(%ecx) */ +	 0xa1, 0, 0, 0, 0,       	/* mov    COUNTER,%eax */ +	 0x83, 0xc1, 0x10,             	/* add    $0x10,%ecx */ +	 0x48,                   	/* dec    %eax */ +	 0x89, 0x0d, 0,0,0,0,    	/* mov    %ecx,DMAPTR */ +	 0xa3, 0, 0, 0, 0,      	/* mov    %eax,COUNTER */ +	 0x74, 0x01,                	/* je     +1 */ +	 0xc3,                   	/* ret     */ +	 0xff, 0x25, 0,0,0,0    	/* jmp    *NOTIFY */ +      }; + +      dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); +      memcpy (dfn->code, temp, sizeof(temp)); +      FIXUP(dfn->code, 2, 0x0, (int)&tnl->dmaptr); +      FIXUP(dfn->code, 25, 0x0, (int)&tnl->vertex[3]); +      FIXUP(dfn->code, 36, 0x0, (int)&tnl->counter); +      FIXUP(dfn->code, 46, 0x0, (int)&tnl->dmaptr); +      FIXUP(dfn->code, 51, 0x0, (int)&tnl->counter); +      FIXUP(dfn->code, 60, 0x0, (int)&tnl->notify); +      break; +   } +   case 6: { +      static  char temp[] = { +	 0x57,                   	/* push   %edi */ +	 0x8b, 0x3d, 0, 0, 0, 0,    	/* mov    DMAPTR,%edi */ +	 0x8b, 0x44, 0x24, 0x8,       	/* mov    0x8(%esp,1),%eax */ +	 0x8b, 0x54, 0x24, 0xc,       	/* mov    0xc(%esp,1),%edx */ +	 0x8b, 0x4c, 0x24, 0x10,       	/* mov    0x10(%esp,1),%ecx */ +	 0x89, 0x07,                	/* mov    %eax,(%edi) */ +	 0x89, 0x57, 0x04,             	/* mov    %edx,0x4(%edi) */ +	 0x89, 0x4f, 0x08,             	/* mov    %ecx,0x8(%edi) */ +	 0xa1, 0, 0, 0, 0,       	/* mov    VERTEX[3],%eax */ +	 0x8b, 0x15, 0, 0, 0, 0,    	/* mov    VERTEX[4],%edx */ +	 0x8b, 0x0d, 0, 0, 0, 0,    	/* mov    VERTEX[5],%ecx */ +	 0x89, 0x47, 0x0c,             	/* mov    %eax,0xc(%edi) */ +	 0x89, 0x57, 0x10,             	/* mov    %edx,0x10(%edi) */ +	 0x89, 0x4f, 0x14,             	/* mov    %ecx,0x14(%edi) */ +	 0x83, 0xc7, 0x18,             	/* add    $0x18,%edi */ +	 0xa1, 0, 0, 0, 0,       	/* mov    COUNTER,%eax */ +	 0x89, 0x3d, 0, 0, 0, 0,    	/* mov    %edi,DMAPTR */ +	 0x48,                   	/* dec    %eax */ +	 0x5f,                   	/* pop    %edi */ +	 0xa3, 0, 0, 0, 0,       	/* mov    %eax,COUNTER */ +	 0x74, 0x01,                	/* je     +1 */ +	 0xc3,                   	/* ret     */ +	 0xff, 0x25, 0,0,0,0,    	/* jmp    *NOTIFY */ +      }; + +      dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); +      memcpy (dfn->code, temp, sizeof(temp)); +      FIXUP(dfn->code, 3, 0x0, (int)&tnl->dmaptr); +      FIXUP(dfn->code, 28, 0x0, (int)&tnl->vertex[3]); +      FIXUP(dfn->code, 34, 0x0, (int)&tnl->vertex[4]); +      FIXUP(dfn->code, 40, 0x0, (int)&tnl->vertex[5]); +      FIXUP(dfn->code, 57, 0x0, (int)&tnl->counter); +      FIXUP(dfn->code, 63, 0x0, (int)&tnl->dmaptr); +      FIXUP(dfn->code, 70, 0x0, (int)&tnl->counter); +      FIXUP(dfn->code, 79, 0x0, (int)&tnl->notify); +      break; +   } +   default: { +      /* Repz convenient as it's possible to emit code for any size +       * vertex with little tweaking.  Might as well read vertsize +       * though, and have only one of these. +       */ +      static  char temp[] = { +	 0x57,                     	/* push   %edi */ +	 0x56,                     	/* push   %esi */ +	 0xbe, 0, 0, 0, 0,         	/* mov    $VERTEX+3,%esi */ +	 0x8b, 0x3d, 0, 0, 0, 0,    	/* mov    DMAPTR,%edi */ +	 0x8b, 0x44, 0x24, 0x0c,        /* mov    0x0c(%esp,1),%eax */ +	 0x8b, 0x54, 0x24, 0x10,        /* mov    0x10(%esp,1),%edx */ +	 0x8b, 0x4c, 0x24, 0x14,        /* mov    0x14(%esp,1),%ecx */ +	 0x89, 0x07,                	/* mov    %eax,(%edi) */ +	 0x89, 0x57, 0x04,             	/* mov    %edx,0x4(%edi) */ +	 0x89, 0x4f, 0x08,             	/* mov    %ecx,0x8(%edi) */ +	 0x83, 0xc7, 0x0c,             	/* add    $0xc,%edi */ +	 0xb9, 0, 0, 0, 0,         	/* mov    $VERTSIZE-3,%ecx */ +	 0xf3, 0xa5,                	/* repz movsl %ds:(%esi),%es:(%edi)*/ +	 0xa1, 0, 0, 0, 0,         	/* mov    COUNTER,%eax */ +	 0x89, 0x3d, 0, 0, 0, 0,    	/* mov    %edi,DMAPTR */ +	 0x48,                     	/* dec    %eax */ +	 0xa3, 0, 0, 0, 0,          	/* mov    %eax,COUNTER */ +	 0x5e,                     	/* pop    %esi */ +	 0x5f,                     	/* pop    %edi */ +	 0x74, 0x01,                	/* je     +1 */ +	 0xc3,                     	/* ret     */ +	 0xff, 0x25, 0, 0, 0, 0    	/* jmp    NOTIFY */ +      }; + +      dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); +      memcpy (dfn->code, temp, sizeof(temp)); +      FIXUP(dfn->code, 3, 0x0, (int)&tnl->vertex[3]); +      FIXUP(dfn->code, 9, 0x0, (int)&tnl->dmaptr); +      FIXUP(dfn->code, 37, 0x0, tnl->vertex_size-3); +      FIXUP(dfn->code, 44, 0x0, (int)&tnl->counter); +      FIXUP(dfn->code, 50, 0x0, (int)&tnl->dmaptr); +      FIXUP(dfn->code, 56, 0x0, (int)&tnl->counter); +      FIXUP(dfn->code, 67, 0x0, (int)&tnl->notify); +   break; +   } +   } + +   insert_at_head( &tnl->dfn_cache.Vertex3f, dfn ); +   dfn->key = key; +   return dfn; +} + + + +struct dynfn *tnl_makeX86Vertex3fv( TNLcontext *tnl, int key ) +{ +   struct dynfn *dfn = MALLOC_STRUCT( dynfn ); + +   if (TNL_DEBUG & DEBUG_CODEGEN) +      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key ); + +   switch (tnl->vertex_size) { +   case 6: { +      static  char temp[] = { +	 0xa1, 0x00, 0x00, 0, 0,       	/* mov    0x0,%eax */ +	 0x8b, 0x4c, 0x24, 0x04,       	/* mov    0x4(%esp,1),%ecx */ +	 0x8b, 0x11,                	/* mov    (%ecx),%edx */ +	 0x89, 0x10,                	/* mov    %edx,(%eax) */ +	 0x8b, 0x51, 0x04,             	/* mov    0x4(%ecx),%edx */ +	 0x8b, 0x49, 0x08,             	/* mov    0x8(%ecx),%ecx */ +	 0x89, 0x50, 0x04,             	/* mov    %edx,0x4(%eax) */ +	 0x89, 0x48, 0x08,             	/* mov    %ecx,0x8(%eax) */ +	 0x8b, 0x15, 0x1c, 0, 0, 0,    	/* mov    0x1c,%edx */ +	 0x8b, 0x0d, 0x20, 0, 0, 0,    	/* mov    0x20,%ecx */ +	 0x89, 0x50, 0x0c,             	/* mov    %edx,0xc(%eax) */ +	 0x89, 0x48, 0x10,             	/* mov    %ecx,0x10(%eax) */ +	 0x8b, 0x15, 0x24, 0, 0, 0,    	/* mov    0x24,%edx */ +	 0x89, 0x50, 0x14,             	/* mov    %edx,0x14(%eax) */ +	 0x83, 0xc0, 0x18,             	/* add    $0x18,%eax */ +	 0xa3, 0x00, 0x00, 0, 0,       	/* mov    %eax,0x0 */ +	 0xa1, 0x04, 0x00, 0, 0,       	/* mov    0x4,%eax */ +	 0x48,                   	/* dec    %eax */ +	 0xa3, 0x04, 0x00, 0, 0,       	/* mov    %eax,0x4 */ +	 0x74, 0x01,                	/* je     2a4 <.f11> */ +	 0xc3,                   	/* ret     */ +	 0xff, 0x25, 0x08, 0, 0, 0,    	/* jmp    *0x8 */ +      }; + +      dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); +      memcpy (dfn->code, temp, sizeof(temp)); +      FIXUP(dfn->code, 1, 0x00000000, (int)&tnl->dmaptr); +      FIXUP(dfn->code, 27, 0x0000001c, (int)&tnl->vertex[3]); +      FIXUP(dfn->code, 33, 0x00000020, (int)&tnl->vertex[4]); +      FIXUP(dfn->code, 45, 0x00000024, (int)&tnl->vertex[5]); +      FIXUP(dfn->code, 56, 0x00000000, (int)&tnl->dmaptr); +      FIXUP(dfn->code, 61, 0x00000004, (int)&tnl->counter); +      FIXUP(dfn->code, 67, 0x00000004, (int)&tnl->counter); +      FIXUP(dfn->code, 76, 0x00000008, (int)&tnl->notify); +      break; +   } +    + +   case 8: { +      static  char temp[] = { +	 0xa1, 0x00, 0x00, 0, 0,       	/* mov    0x0,%eax */ +	 0x8b, 0x4c, 0x24, 0x04,       	/* mov    0x4(%esp,1),%ecx */ +	 0x8b, 0x11,                	/* mov    (%ecx),%edx */ +	 0x89, 0x10,                	/* mov    %edx,(%eax) */ +	 0x8b, 0x51, 0x04,             	/* mov    0x4(%ecx),%edx */ +	 0x8b, 0x49, 0x08,             	/* mov    0x8(%ecx),%ecx */ +	 0x89, 0x50, 0x04,             	/* mov    %edx,0x4(%eax) */ +	 0x89, 0x48, 0x08,             	/* mov    %ecx,0x8(%eax) */ +	 0x8b, 0x15, 0x1c, 0, 0, 0,    	/* mov    0x1c,%edx */ +	 0x8b, 0x0d, 0x20, 0, 0, 0,    	/* mov    0x20,%ecx */ +	 0x89, 0x50, 0x0c,             	/* mov    %edx,0xc(%eax) */ +	 0x89, 0x48, 0x10,             	/* mov    %ecx,0x10(%eax) */ +	 0x8b, 0x15, 0x1c, 0, 0, 0,    	/* mov    0x1c,%edx */ +	 0x8b, 0x0d, 0x20, 0, 0, 0,    	/* mov    0x20,%ecx */ +	 0x89, 0x50, 0x14,             	/* mov    %edx,0x14(%eax) */ +	 0x89, 0x48, 0x18,             	/* mov    %ecx,0x18(%eax) */ +	 0x8b, 0x15, 0x24, 0, 0, 0,    	/* mov    0x24,%edx */ +	 0x89, 0x50, 0x1c,             	/* mov    %edx,0x1c(%eax) */ +	 0x83, 0xc0, 0x20,             	/* add    $0x20,%eax */ +	 0xa3, 0x00, 0x00, 0, 0,       	/* mov    %eax,0x0 */ +	 0xa1, 0x04, 0x00, 0, 0,       	/* mov    0x4,%eax */ +	 0x48,                   	/* dec    %eax */ +	 0xa3, 0x04, 0x00, 0, 0,       	/* mov    %eax,0x4 */ +	 0x74, 0x01,                	/* je     2a4 <.f11> */ +	 0xc3,                   	/* ret     */ +	 0xff, 0x25, 0x08, 0, 0, 0,    	/* jmp    *0x8 */ +      }; + +      dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); +      memcpy (dfn->code, temp, sizeof(temp)); +      FIXUP(dfn->code, 1, 0x00000000, (int)&tnl->dmaptr); +      FIXUP(dfn->code, 27, 0x0000001c, (int)&tnl->vertex[3]); +      FIXUP(dfn->code, 33, 0x00000020, (int)&tnl->vertex[4]); +      FIXUP(dfn->code, 45, 0x0000001c, (int)&tnl->vertex[5]); +      FIXUP(dfn->code, 51, 0x00000020, (int)&tnl->vertex[6]); +      FIXUP(dfn->code, 63, 0x00000024, (int)&tnl->vertex[7]); +      FIXUP(dfn->code, 74, 0x00000000, (int)&tnl->dmaptr); +      FIXUP(dfn->code, 79, 0x00000004, (int)&tnl->counter); +      FIXUP(dfn->code, 85, 0x00000004, (int)&tnl->counter); +      FIXUP(dfn->code, 94, 0x00000008, (int)&tnl->notify); +      break; +   } +    + + +   default: { +      /* Repz convenient as it's possible to emit code for any size +       * vertex with little tweaking.  Might as well read vertsize +       * though, and have only one of these. +       */ +      static  char temp[] = { +	 0x8b, 0x54, 0x24, 0x04,        /* mov    0x4(%esp,1),%edx */ +	 0x57,                   	/* push   %edi */ +	 0x56,                   	/* push   %esi */ +	 0x8b, 0x3d, 1,1,1,1,    	/* mov    DMAPTR,%edi */ +	 0x8b, 0x02,                	/* mov    (%edx),%eax */ +	 0x8b, 0x4a, 0x04,             	/* mov    0x4(%edx),%ecx */ +	 0x8b, 0x72, 0x08,             	/* mov    0x8(%edx),%esi */ +	 0x89, 0x07,                	/* mov    %eax,(%edi) */ +	 0x89, 0x4f, 0x04,             	/* mov    %ecx,0x4(%edi) */ +	 0x89, 0x77, 0x08,             	/* mov    %esi,0x8(%edi) */ +	 0x83, 0xc7, 0x0c,             	/* add    $0xc,%edi */ +	 0xb9, 0x06, 0x00, 0x00, 0x00,  /* mov    $VERTSIZE-3,%ecx */ +	 0xbe, 0x58, 0x00, 0x00, 0x00,	/* mov    $VERTEX[3],%esi */ +	 0xf3, 0xa5,                	/* repz movsl %ds:(%esi),%es:(%edi)*/ +	 0x89, 0x3d, 1, 1, 1, 1,    	/* mov    %edi,DMAPTR */ +	 0xa1, 2, 2, 2, 2,       	/* mov    COUNTER,%eax */ +	 0x5e,                   	/* pop    %esi */ +	 0x5f,                   	/* pop    %edi */ +	 0x48,                   	/* dec    %eax */ +	 0xa3, 2, 2, 2, 2,       	/* mov    %eax,COUNTER */ +	 0x74, 0x01,                	/* je     +1 */ +	 0xc3,                     	/* ret     */ +	 0xff, 0x25, 0, 0, 0, 0    	/* jmp    NOTIFY */ +      }; + +      dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); +      memcpy (dfn->code, temp, sizeof(temp)); +      FIXUP(dfn->code, 8, 0x01010101, (int)&tnl->dmaptr); +      FIXUP(dfn->code, 32, 0x00000006, tnl->vertex_size-3); +      FIXUP(dfn->code, 37, 0x00000058, (int)&tnl->vertex[3]); +      FIXUP(dfn->code, 45, 0x01010101, (int)&tnl->dmaptr); +      FIXUP(dfn->code, 50, 0x02020202, (int)&tnl->counter); +      FIXUP(dfn->code, 58, 0x02020202, (int)&tnl->counter); +      FIXUP(dfn->code, 67, 0x0, (int)&tnl->notify); +   break; +   } +   } + +   insert_at_head( &tnl->dfn_cache.Vertex3fv, dfn ); +   dfn->key = key; +   return dfn; +} + + +struct dynfn *tnl_makeX86Normal3fv( TNLcontext *tnl, int key ) +{ +   static  char temp[] = { +      0x8b, 0x44, 0x24, 0x04,          	/* mov    0x4(%esp,1),%eax */ +      0xba, 0, 0, 0, 0,         	/* mov    $DEST,%edx */ +      0x8b, 0x08,                	/* mov    (%eax),%ecx */ +      0x89, 0x0a,                	/* mov    %ecx,(%edx) */ +      0x8b, 0x48, 0x04,             	/* mov    0x4(%eax),%ecx */ +      0x89, 0x4a, 0x04,             	/* mov    %ecx,0x4(%edx) */ +      0x8b, 0x48, 0x08,             	/* mov    0x8(%eax),%ecx */ +      0x89, 0x4a, 0x08,             	/* mov    %ecx,0x8(%edx) */ +      0xc3,                     	/* ret    */ +   }; + +   struct dynfn *dfn = MALLOC_STRUCT( dynfn ); + +   if (TNL_DEBUG & DEBUG_CODEGEN) +      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key ); + +   insert_at_head( &tnl->dfn_cache.Normal3fv, dfn ); +   dfn->key = key; +   dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); +   memcpy (dfn->code, temp, sizeof(temp)); +   FIXUP(dfn->code, 5, 0x0, (int)tnl->normalptr);  +   return dfn; +} + +struct dynfn *tnl_makeX86Normal3f( TNLcontext *tnl, int key ) +{ +   static  char temp[] = { +      0xba, 0x78, 0x56, 0x34, 0x12,    	/*  mov    $DEST,%edx */ +      0x8b, 0x44, 0x24, 0x04,          	/*  mov    0x4(%esp,1),%eax */ +      0x89, 0x02,                	/*  mov    %eax,(%edx) */ +      0x8b, 0x44, 0x24, 0x08,          	/*  mov    0x8(%esp,1),%eax */ +      0x89, 0x42, 0x04,             	/*  mov    %eax,0x4(%edx) */ +      0x8b, 0x44, 0x24, 0x0c,          	/*  mov    0xc(%esp,1),%eax */ +      0x89, 0x42, 0x08,             	/*  mov    %eax,0x8(%edx) */ +      0xc3,                     	/*  ret     */ +   }; + +   struct dynfn *dfn = MALLOC_STRUCT( dynfn ); + +   if (TNL_DEBUG & DEBUG_CODEGEN) +      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key ); + +   insert_at_head( &tnl->dfn_cache.Normal3f, dfn ); +   dfn->key = key; +   dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); +   memcpy (dfn->code, temp, sizeof(temp)); +   FIXUP(dfn->code, 1, 0x12345678, (int)tnl->normalptr);  +   return dfn; +} + +struct dynfn *tnl_makeX86Color4ubv( TNLcontext *tnl, int key ) +{ +   struct dynfn *dfn = MALLOC_STRUCT( dynfn ); +   insert_at_head( &tnl->dfn_cache.Color4ubv, dfn ); +   dfn->key = key; + +   if (TNL_DEBUG & DEBUG_CODEGEN) +      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key ); + +   if (key & TNL_CP_VC_FRMT_PKCOLOR) { +      static  char temp[] = { +	 0x8b, 0x44, 0x24, 0x04,        /*  mov    0x4(%esp,1),%eax */ +	 0xba, 0x78, 0x56, 0x34, 0x12,  /*  mov    $DEST,%edx */ +	 0x8b, 0x00,                	/*  mov    (%eax),%eax */ +	 0x89, 0x02,               	/*  mov    %eax,(%edx) */ +	 0xc3,                     	/*  ret     */ +      }; + +      dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); +      memcpy (dfn->code, temp, sizeof(temp)); +      FIXUP(dfn->code, 5, 0x12345678, (int)tnl->ubytecolorptr);  +      return dfn; +   }  +   else { +      static  char temp[] = { +	 0x53,					/* push   %ebx */ +	 0xba, 0x00, 0x00, 0x00, 0x00,       	/* mov    $0x0,%edx */ +	 0x31, 0xc0,                    	/* xor    %eax,%eax */ +	 0x31, 0xc9,                    	/* xor    %ecx,%ecx */ +	 0x8b, 0x5c, 0x24, 0x08,		/* mov	  0x8(%esp,1), %ebx */ +	 0x8b, 0x1b,				/* mov    (%ebx), %ebx */ +	 0x88, 0xd8,				/* mov    %bl, %al */ +	 0x88, 0xf9, 	         		/* mov    %bh, %cl */ +	 0x8b, 0x04, 0x82,              	/* mov    (%edx,%eax,4),%eax */ +	 0x8b, 0x0c, 0x8a,              	/* mov    (%edx,%ecx,4),%ecx */ +	 0xa3, 0xaf, 0xbe, 0xad, 0xde,       	/* mov    %eax,0xdeadbeaf */ +	 0x89, 0x0d, 0xaf, 0xbe, 0xad, 0xde,   	/* mov    %ecx,0xdeadbeaf */ +	 0x31, 0xc0,                    	/* xor    %eax,%eax */ +	 0x31, 0xc9,                    	/* xor    %ecx,%ecx */ +	 0xc1, 0xeb, 0x10,			/* shr    $0x10, %ebx */ +	 0x88, 0xd8,				/* mov    %bl, %al */ +	 0x88, 0xf9, 	         		/* mov    %bh, %cl */ +	 0x8b, 0x04, 0x82,               	/* mov    (%edx,%eax,4),%eax */ +	 0x8b, 0x0c, 0x8a,               	/* mov    (%edx,%ecx,4),%ecx */ +	 0xa3, 0xaf, 0xbe, 0xad, 0xde,       	/* mov    %eax,0xdeadbeaf */ +	 0x89, 0x0d, 0xaf, 0xbe, 0xad, 0xde,   	/* mov    %ecx,0xdeadbeaf */ +	 0x5b,                      		/* pop    %ebx */ +	 0xc3,                          	/* ret     */ +      }; + +      dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); +      memcpy (dfn->code, temp, sizeof(temp)); +      FIXUP(dfn->code, 2, 0x00000000, (int)_mesa_ubyte_to_float_color_tab);  +      FIXUP(dfn->code, 27, 0xdeadbeaf, (int)tnl->floatcolorptr);  +      FIXUP(dfn->code, 33, 0xdeadbeaf, (int)tnl->floatcolorptr+4);  +      FIXUP(dfn->code, 55, 0xdeadbeaf, (int)tnl->floatcolorptr+8);  +      FIXUP(dfn->code, 61, 0xdeadbeaf, (int)tnl->floatcolorptr+12);  +      return dfn; +   } +} + +struct dynfn *tnl_makeX86Color4ub( TNLcontext *tnl, int key ) +{ +   if (TNL_DEBUG & DEBUG_CODEGEN) +      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key ); + +   if (key & TNL_CP_VC_FRMT_PKCOLOR) { +      /* XXX push/pop */ +      static  char temp[] = { +	 0x53,                     	/* push   %ebx */ +	 0x8b, 0x44, 0x24, 0x08,          	/* mov    0x8(%esp,1),%eax */ +	 0x8b, 0x54, 0x24, 0x0c,          	/* mov    0xc(%esp,1),%edx */ +	 0x8b, 0x4c, 0x24, 0x10,          	/* mov    0x10(%esp,1),%ecx */ +	 0x8b, 0x5c, 0x24, 0x14,          	/* mov    0x14(%esp,1),%ebx */ +	 0xa2, 0, 0, 0, 0,		/* mov    %al,DEST */ +	 0x88, 0x15, 0, 0, 0, 0,	/* mov    %dl,DEST+1 */ +	 0x88, 0x0d, 0, 0, 0, 0,	/* mov    %cl,DEST+2 */ +	 0x88, 0x1d, 0, 0, 0, 0,	/* mov    %bl,DEST+3 */ +	 0x5b,                      	/* pop    %ebx */ +	 0xc3,                     	/* ret     */ +      }; + +      struct dynfn *dfn = MALLOC_STRUCT( dynfn ); +      insert_at_head( &tnl->dfn_cache.Color4ub, dfn ); +      dfn->key = key; + +      dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); +      memcpy (dfn->code, temp, sizeof(temp)); +      FIXUP(dfn->code, 18, 0x0, (int)tnl->ubytecolorptr);  +      FIXUP(dfn->code, 24, 0x0, (int)tnl->ubytecolorptr+1);  +      FIXUP(dfn->code, 30, 0x0, (int)tnl->ubytecolorptr+2);  +      FIXUP(dfn->code, 36, 0x0, (int)tnl->ubytecolorptr+3);  +      return dfn; +   } +   else +      return 0; +} + + +struct dynfn *tnl_makeX86Color3fv( TNLcontext *tnl, int key ) +{ +   if (key & (TNL_CP_VC_FRMT_PKCOLOR|TNL_CP_VC_FRMT_FPALPHA)) +      return 0; +   else +   { +      static  char temp[] = { +	 0x8b, 0x44, 0x24, 0x04,          	/* mov    0x4(%esp,1),%eax */ +	 0xba, 0, 0, 0, 0,         	/* mov    $DEST,%edx */ +	 0x8b, 0x08,                	/* mov    (%eax),%ecx */ +	 0x89, 0x0a,                	/* mov    %ecx,(%edx) */ +	 0x8b, 0x48, 0x04,             	/* mov    0x4(%eax),%ecx */ +	 0x89, 0x4a, 0x04,             	/* mov    %ecx,0x4(%edx) */ +	 0x8b, 0x48, 0x08,             	/* mov    0x8(%eax),%ecx */ +	 0x89, 0x4a, 0x08,             	/* mov    %ecx,0x8(%edx) */ +	 0xc3,                     	/* ret    */ +      }; + +      struct dynfn *dfn = MALLOC_STRUCT( dynfn ); + +      if (TNL_DEBUG & DEBUG_CODEGEN) +	 fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key ); + +      insert_at_head( &tnl->dfn_cache.Color3fv, dfn ); +      dfn->key = key; +      dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); +      memcpy (dfn->code, temp, sizeof(temp)); +      FIXUP(dfn->code, 5, 0x0, (int)tnl->floatcolorptr);  +      return dfn; +   } +} + +struct dynfn *tnl_makeX86Color3f( TNLcontext *tnl, int key ) +{ +   if (key & (TNL_CP_VC_FRMT_PKCOLOR|TNL_CP_VC_FRMT_FPALPHA)) +      return 0; +   else +   { +      static  char temp[] = { +	 0xba, 0x78, 0x56, 0x34, 0x12,    	/*  mov    $DEST,%edx */ +	 0x8b, 0x44, 0x24, 0x04,          	/*  mov    0x4(%esp,1),%eax */ +	 0x89, 0x02,                	/*  mov    %eax,(%edx) */ +	 0x8b, 0x44, 0x24, 0x08,          	/*  mov    0x8(%esp,1),%eax */ +	 0x89, 0x42, 0x04,             	/*  mov    %eax,0x4(%edx) */ +	 0x8b, 0x44, 0x24, 0x0c,          	/*  mov    0xc(%esp,1),%eax */ +	 0x89, 0x42, 0x08,             	/*  mov    %eax,0x8(%edx) */ +	 0xc3,                     	/*  ret     */ +      }; + +      struct dynfn *dfn = MALLOC_STRUCT( dynfn ); + +      if (TNL_DEBUG & DEBUG_CODEGEN) +	 fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key ); + +      insert_at_head( &tnl->dfn_cache.Color3f, dfn ); +      dfn->key = key; +      dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); +      memcpy (dfn->code, temp, sizeof(temp)); +      FIXUP(dfn->code, 1, 0x12345678, (int)tnl->floatcolorptr);  +      return dfn; +   } +} + + + +struct dynfn *tnl_makeX86TexCoord2fv( TNLcontext *tnl, int key ) +{ +   static  char temp[] = { +      0x8b, 0x44, 0x24, 0x04,          	/* mov    0x4(%esp,1),%eax */ +      0xba, 0x78, 0x56, 0x34, 0x12,     /* mov    $DEST,%edx */ +      0x8b, 0x08,                	/* mov    (%eax),%ecx */ +      0x8b, 0x40, 0x04,             	/* mov    0x4(%eax),%eax */ +      0x89, 0x0a,                	/* mov    %ecx,(%edx) */ +      0x89, 0x42, 0x04,             	/* mov    %eax,0x4(%edx) */ +      0xc3,                     	/* ret     */ +   }; + +   struct dynfn *dfn = MALLOC_STRUCT( dynfn ); + +   if (TNL_DEBUG & DEBUG_CODEGEN) +      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key ); + +   insert_at_head( &tnl->dfn_cache.TexCoord2fv, dfn ); +   dfn->key = key; +   dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); +   memcpy (dfn->code, temp, sizeof(temp)); +   FIXUP(dfn->code, 5, 0x12345678, (int)tnl->texcoordptr[0]);  +   return dfn; +} + +struct dynfn *tnl_makeX86TexCoord2f( TNLcontext *tnl, int key ) +{ +   static  char temp[] = { +      0xba, 0x78, 0x56, 0x34, 0x12,    	/* mov    $DEST,%edx */ +      0x8b, 0x44, 0x24, 0x04,          	/* mov    0x4(%esp,1),%eax */ +      0x8b, 0x4c, 0x24, 0x08,          	/* mov    0x8(%esp,1),%ecx */ +      0x89, 0x02,                	/* mov    %eax,(%edx) */ +      0x89, 0x4a, 0x04,             	/* mov    %ecx,0x4(%edx) */ +      0xc3,                     	/* ret     */ +   }; + +   struct dynfn *dfn = MALLOC_STRUCT( dynfn ); + +   if (TNL_DEBUG & DEBUG_CODEGEN) +      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key ); + +   insert_at_head( &tnl->dfn_cache.TexCoord2f, dfn ); +   dfn->key = key; +   dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); +   memcpy (dfn->code, temp, sizeof(temp)); +   FIXUP(dfn->code, 1, 0x12345678, (int)tnl->texcoordptr[0]);  +   return dfn; +} + +struct dynfn *tnl_makeX86MultiTexCoord2fvARB( TNLcontext *tnl, int key ) +{ +   static  char temp[] = { +      0x8b, 0x44, 0x24, 0x04,          	/* mov    0x4(%esp,1),%eax */ +      0x8b, 0x4c, 0x24, 0x08,          	/* mov    0x8(%esp,1),%ecx */ +      0x2d, 0xc0, 0x84, 0x00, 0x00,    	/* sub    $0x84c0,%eax */ +      0x83, 0xe0, 0x01,             	/* and    $0x1,%eax */ +      0x8b, 0x11,                	/* mov    (%ecx),%edx */ +      0xc1, 0xe0, 0x03,             	/* shl    $0x3,%eax */ +      0x8b, 0x49, 0x04,             	/* mov    0x4(%ecx),%ecx */ +      0x89, 0x90, 0, 0, 0, 0,/* mov    %edx,DEST(%eax) */ +      0x89, 0x88, 0, 0, 0, 0,/* mov    %ecx,DEST+8(%eax) */ +      0xc3,                     	/* ret     */ +   }; + +   static char temp2[] = { +      0x8b, 0x44, 0x24, 0x04,          	/* mov    0x4(%esp,1),%eax */ +      0x8b, 0x4c, 0x24, 0x08,          	/* mov    0x8(%esp,1),%ecx */ +      0x2d, 0xc0, 0x84, 0x00, 0x00,    	/* sub    $0x84c0,%eax */ +      0x83, 0xe0, 0x01,             	/* and    $0x1,%eax */ +      0x8b, 0x14, 0x85, 0, 0, 0, 0, /* mov    DEST(,%eax,4),%edx */ +      0x8b, 0x01,                	/* mov    (%ecx),%eax */ +      0x89, 0x02,                	/* mov    %eax,(%edx) */ +      0x8b, 0x41, 0x04,             	/* mov    0x4(%ecx),%eax */ +      0x89, 0x42, 0x04,             	/* mov    %eax,0x4(%edx) */ +      0xc3,                     	/* ret     */ +   }; + +   struct dynfn *dfn = MALLOC_STRUCT( dynfn ); + +   if (TNL_DEBUG & DEBUG_CODEGEN) +      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key ); + +   insert_at_head( &tnl->dfn_cache.MultiTexCoord2fvARB, dfn ); +   dfn->key = key; + +   if ((key & (TNL_CP_VC_FRMT_ST0|TNL_CP_VC_FRMT_ST1)) == +      (TNL_CP_VC_FRMT_ST0|TNL_CP_VC_FRMT_ST1)) { +      dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); +      memcpy (dfn->code, temp, sizeof(temp)); +      FIXUP(dfn->code, 26, 0x0, (int)tnl->texcoordptr[0]);	 +      FIXUP(dfn->code, 32, 0x0, (int)tnl->texcoordptr[0]+4); +   } else { +      dfn->code = ALIGN_MALLOC( sizeof(temp2), 16 ); +      memcpy (dfn->code, temp2, sizeof(temp2)); +      FIXUP(dfn->code, 19, 0x0, (int)tnl->texcoordptr); +   } +   return dfn; +} + +struct dynfn *tnl_makeX86MultiTexCoord2fARB( TNLcontext *tnl,  +						int key ) +{ +   static  char temp[] = { +      0x8b, 0x44, 0x24, 0x04,          	/* mov    0x4(%esp,1),%eax */ +      0x8b, 0x54, 0x24, 0x08,          	/* mov    0x8(%esp,1),%edx */ +      0x2d, 0xc0, 0x84, 0x00, 0x00,    	/* sub    $0x84c0,%eax */ +      0x8b, 0x4c, 0x24, 0x0c,          	/* mov    0xc(%esp,1),%ecx */ +      0x83, 0xe0, 0x01,             	/* and    $0x1,%eax */ +      0xc1, 0xe0, 0x03,             	/* shl    $0x3,%eax */ +      0x89, 0x90, 0, 0, 0, 0,	/* mov    %edx,DEST(%eax) */ +      0x89, 0x88, 0, 0, 0, 0,	/* mov    %ecx,DEST+8(%eax) */ +      0xc3,                     	/* ret     */ +   }; + +   static char temp2[] = { +      0x8b, 0x44, 0x24, 0x04,          	/* mov    0x4(%esp,1),%eax */ +      0x8b, 0x54, 0x24, 0x08,          	/* mov    0x8(%esp,1),%edx */ +      0x2d, 0xc0, 0x84, 0x00, 0x00,    	/* sub    $0x84c0,%eax */ +      0x8b, 0x4c, 0x24, 0x0c,          	/* mov    0xc(%esp,1),%ecx */ +      0x83, 0xe0, 0x01,             	/* and    $0x1,%eax */ +      0x8b, 0x04, 0x85, 0, 0, 0, 0,     /* mov    DEST(,%eax,4),%eax */ +      0x89, 0x10,                	/* mov    %edx,(%eax) */ +      0x89, 0x48, 0x04,             	/* mov    %ecx,0x4(%eax) */ +      0xc3,                   	        /* ret     */ +   }; + +   struct dynfn *dfn = MALLOC_STRUCT( dynfn ); + +   if (TNL_DEBUG & DEBUG_CODEGEN) +      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key ); + +   insert_at_head( &tnl->dfn_cache.MultiTexCoord2fARB, dfn ); +   dfn->key = key; + +   if ((key & (TNL_CP_VC_FRMT_ST0|TNL_CP_VC_FRMT_ST1)) == +       (TNL_CP_VC_FRMT_ST0|TNL_CP_VC_FRMT_ST1)) { +      dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); +      memcpy (dfn->code, temp, sizeof(temp)); +      FIXUP(dfn->code, 25, 0x0, (int)tnl->texcoordptr[0]);  +      FIXUP(dfn->code, 31, 0x0, (int)tnl->texcoordptr[0]+4);  +   } +   else { +      /* Note: this might get generated multiple times, even though the +       * actual emitted code is the same. +       */ +      dfn->code = ALIGN_MALLOC( sizeof(temp2), 16 ); +      memcpy (dfn->code, temp2, sizeof(temp2)); +      FIXUP(dfn->code, 23, 0x0, (int)tnl->texcoordptr);  +   }       +   return dfn; +} + + +void _tnl_InitX86Codegen( struct dfn_generators *gen ) +{ +   gen->Vertex3f = tnl_makeX86Vertex3f; +   gen->Vertex3fv = tnl_makeX86Vertex3fv; +   gen->Color4ub = tnl_makeX86Color4ub; /* PKCOLOR only */ +   gen->Color4ubv = tnl_makeX86Color4ubv; /* PKCOLOR only */ +   gen->Normal3f = tnl_makeX86Normal3f; +   gen->Normal3fv = tnl_makeX86Normal3fv; +   gen->TexCoord2f = tnl_makeX86TexCoord2f; +   gen->TexCoord2fv = tnl_makeX86TexCoord2fv; +   gen->MultiTexCoord2fARB = tnl_makeX86MultiTexCoord2fARB; +   gen->MultiTexCoord2fvARB = tnl_makeX86MultiTexCoord2fvARB; +   gen->Color3f = tnl_makeX86Color3f; +   gen->Color3fv = tnl_makeX86Color3fv; + +   /* Not done: +    */ +/*     gen->Vertex2f = tnl_makeX86Vertex2f; */ +/*     gen->Vertex2fv = tnl_makeX86Vertex2fv; */ +/*     gen->Color3ub = tnl_makeX86Color3ub; */ +/*     gen->Color3ubv = tnl_makeX86Color3ubv; */ +/*     gen->Color4f = tnl_makeX86Color4f; */ +/*     gen->Color4fv = tnl_makeX86Color4fv; */ +/*     gen->TexCoord1f = tnl_makeX86TexCoord1f; */ +/*     gen->TexCoord1fv = tnl_makeX86TexCoord1fv; */ +/*     gen->MultiTexCoord1fARB = tnl_makeX86MultiTexCoord1fARB; */ +/*     gen->MultiTexCoord1fvARB = tnl_makeX86MultiTexCoord1fvARB; */ +} + + +#else  + +void _tnl_InitX86Codegen( struct dfn_generators *gen ) +{ +   (void) gen; +} + +#endif | 
