diff options
-rw-r--r-- | src/mesa/tnl/t_vtx_api.c | 675 | ||||
-rw-r--r-- | src/mesa/tnl/t_vtx_api.h | 234 | ||||
-rw-r--r-- | src/mesa/tnl/t_vtx_exec.c | 636 | ||||
-rw-r--r-- | src/mesa/tnl/t_vtx_sse.c | 93 | ||||
-rw-r--r-- | src/mesa/tnl/t_vtx_x86.c | 727 |
5 files changed, 2365 insertions, 0 deletions
diff --git a/src/mesa/tnl/t_vtx_api.c b/src/mesa/tnl/t_vtx_api.c new file mode 100644 index 0000000000..8ae0569584 --- /dev/null +++ b/src/mesa/tnl/t_vtx_api.c @@ -0,0 +1,675 @@ +/* $XFree86$ */ +/************************************************************************** + +Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ +#include "mtypes.h" +#include "colormac.h" +#include "simple_list.h" +#include "vtxfmt.h" + +#include "tnl_vtx_api.h" + +/* Fallback versions of all the entrypoints for situations where + * codegen isn't available. This is slowed significantly by all the + * gumph necessary to get to the tnl pointer. + */ + + +/* MultiTexcoord ends up with both of these branches, unfortunately + * (it may its own version of the macro after size-tracking is working). + */ +#define ATTRF( ATTR, N, A, B, C, D ) \ +{ \ + GET_CURRENT_CONTEXT( ctx ); \ + TNLcontext *tnl = TNL_CONTEXT(ctx); \ + \ + if (((ATTR) & 0xf) == 0) { \ + int i; \ + \ + if (N>0) tnl->dmaptr[0].f = A; \ + if (N>1) tnl->dmaptr[1].f = B; \ + if (N>2) tnl->dmaptr[2].f = C; \ + if (N>3) tnl->dmaptr[3].f = D; \ + \ + for (i = N; i < tnl->vertex_size; i++) \ + *tnl->dmaptr[i].i = tnl->vertex[i].i; \ + \ + tnl->dmaptr += tnl->vertex_size; \ + \ + if (--tnl->counter == 0) \ + tnl->notify(); \ + } \ + else { \ + GLfloat *dest = tnl->attrptr[(ATTR) & 0xf]; \ + if (N>0) dest[0] = A; \ + if (N>1) dest[1] = B; \ + if (N>2) dest[2] = C; \ + if (N>3) dest[3] = D; \ + } \ +} + +#define ATTR4F( ATTR, A, B, C, D ) ATTRF( ATTR, 4, A, B, C, D ) +#define ATTR3F( ATTR, A, B, C, D ) ATTRF( ATTR, 3, A, B, C, 1 ) +#define ATTR2F( ATTR, A, B, C, D ) ATTRF( ATTR, 2, A, B, 0, 1 ) +#define ATTR1F( ATTR, A, B, C, D ) ATTRF( ATTR, 1, A, 0, 0, 1 ) + +#define ATTR3UB( ATTR, A, B, C ) \ + ATTR3F( ATTR, \ + UBYTE_TO_FLOAT(A), \ + UBYTE_TO_FLOAT(B), \ + UBYTE_TO_FLOAT(C)) + + +#define ATTR4UB( ATTR, A, B, C, D ) \ + ATTR4F( ATTR, \ + UBYTE_TO_FLOAT(A), \ + UBYTE_TO_FLOAT(B), \ + UBYTE_TO_FLOAT(C), \ + UBYTE_TO_FLOAT(D)) + + +/* Vertex + */ +static void tnl_Vertex2f( GLfloat x, GLfloat y ) +{ + ATTR2F( VERT_ATTRIB_POS, x, y ); +} + +static void tnl_Vertex2fv( const GLfloat *v ) +{ + ATTR2F( VERT_ATTRIB_POS, v[0], v[1] ); +} + +static void tnl_Vertex3f( GLfloat x, GLfloat y, GLfloat z ) +{ + ATTR3F( VERT_ATTRIB_POS, x, y, z ); +} + +static void tnl_Vertex3fv( const GLfloat *v ) +{ + ATTR3F( VERT_ATTRIB_POS, v[0], v[1], v[2] ); +} + +static void tnl_Vertex4f( GLfloat x, GLfloat y, GLfloat z, GLfloat w ) +{ + ATTR4F( VERT_ATTRIB_POS, x, y, z, w ); +} + +static void tnl_Vertex4fv( const GLfloat *v ) +{ + ATTR4F( VERT_ATTRIB_POS, v[0], v[1], v[2], v[3] ); +} + + +/* Color + */ +static void tnl_Color3ub( GLubyte r, GLubyte g, GLubyte b ) +{ + ATTR3UB( VERT_ATTRIB_COLOR0, r, g, b ); +} + +static void tnl_Color3ubv( const GLubyte *v ) +{ + ATTR3UB( VERT_ATTRIB_COLOR0, v[0], v[1], v[2] ); +} + +static void tnl_Color4ub( GLubyte r, GLubyte g, GLubyte b, GLubyte a ) +{ + ATTR4UB( VERT_ATTRIB_COLOR0, r, g, b, a ); +} + +static void tnl_Color4ubv( const GLubyte *v ) +{ + ATTR4UB( VERT_ATTRIB_COLOR0, v[0], v[1], v[2], v[3] ); +} + +static void tnl_Color3f( GLfloat r, GLfloat g, GLfloat b ) +{ + ATTR3F( VERT_ATTRIB_COLOR0, r, g, b ); +} + +static void tnl_Color3fv( const GLfloat *v ) +{ + ATTR3F( VERT_ATTRIB_COLOR0, v[0], v[1], v[2] ); +} + +static void tnl_Color4f( GLfloat r, GLfloat g, GLfloat b, GLfloat a ) +{ + ATTR4F( VERT_ATTRIB_COLOR0, r, g, b, a ); +} + +static void tnl_Color4fv( const GLfloat *v ) +{ + ATTR4F( VERT_ATTRIB_COLOR0, v[0], v[1], v[2], v[3] ); +} + + +/* Secondary Color + */ +static void tnl_SecondaryColor3ubEXT( GLubyte r, GLubyte g, GLubyte b ) +{ + ATTR3UB( VERT_ATTRIB_COLOR1, r, g, b ); +} + +static void tnl_SecondaryColor3ubvEXT( const GLubyte *v ) +{ + ATTR3UB( VERT_ATTRIB_COLOR1, v[0], v[1], v[2] ); +} + +static void tnl_SecondaryColor3fEXT( GLfloat r, GLfloat g, GLfloat b ) +{ + ATTR3F( VERT_ATTRIB_COLOR1, r, g, b ); +} + +static void tnl_SecondaryColor3fvEXT( const GLfloat *v ) +{ + ATTR3F( VERT_ATTRIB_COLOR1, v[0], v[1], v[2] ); +} + + + +/* Fog Coord + */ +static void tnl_FogCoordfEXT( GLfloat f ) +{ + ATTR1F( VERT_ATTRIB_FOG, f ); +} + +static void tnl_FogCoordfvEXT( const GLfloat *v ) +{ + ATTR1F( VERT_ATTRIB_FOG, v[0] ); +} + + + +/* Normal + */ +static void tnl_Normal3f( GLfloat n0, GLfloat n1, GLfloat n2 ) +{ + ATTR3F( VERT_ATTRIB_NORMAL, n0, n1, n2 ); +} + +static void tnl_Normal3fv( const GLfloat *v ) +{ + ATTR3F( VERT_ATTRIB_COLOR1, v[0], v[1], v[2] ); +} + + +/* TexCoord + */ +static void tnl_TexCoord1f( GLfloat s ) +{ + ATTR1F( VERT_ATTRIB_TEX0, s ); +} + +static void tnl_TexCoord1fv( const GLfloat *v ) +{ + ATTR1F( VERT_ATTRIB_TEX0, v[0] ); +} + +static void tnl_TexCoord2f( GLfloat s, GLfloat t ) +{ + ATTR2F( VERT_ATTRIB_TEX0, s, t ); +} + +static void tnl_TexCoord2fv( const GLfloat *v ) +{ + ATTR2F( VERT_ATTRIB_TEX0, v[0], v[1] ); +} + +static void tnl_TexCoord3f( GLfloat s, GLfloat t, GLfloat r ) +{ + ATTR3F( VERT_ATTRIB_TEX0, s, t, r ); +} + +static void tnl_TexCoord3fv( const GLfloat *v ) +{ + ATTR3F( VERT_ATTRIB_TEX0, v[0], v[1], v[2] ); +} + +static void tnl_TexCoord4f( GLfloat s, GLfloat t, GLfloat r, GLfloat q ) +{ + ATTR4F( VERT_ATTRIB_TEX0, s, t, r, q ); +} + +static void tnl_TexCoord4fv( const GLfloat *v ) +{ + ATTR4F( VERT_ATTRIB_TEX0, v[0], v[1], v[2], v[3] ); +} + + +/* MultiTexcoord + */ +static void tnl_MultiTexCoord1fARB( GLenum target, GLfloat s ) +{ + GLint attr = (target - GL_TEXTURE0_ARB) + VERT_ATTRIB_TEX0; + ATTR1F( attr, s ); +} + +static void tnl_MultiTexCoord1fvARB( GLenum target, const GLfloat *v ) +{ + GLint attr = (target - GL_TEXTURE0_ARB) + VERT_ATTRIB_TEX0; + ATTR1F( attr, v[0] ); +} + +static void tnl_MultiTexCoord2fARB( GLenum target, GLfloat s, GLfloat t ) +{ + GLint attr = (target - GL_TEXTURE0_ARB) + VERT_ATTRIB_TEX0; + ATTR2F( attr, s, t ); +} + +static void tnl_MultiTexCoord2fvARB( GLenum target, const GLfloat *v ) +{ + GLint attr = (target - GL_TEXTURE0_ARB) + VERT_ATTRIB_TEX0; + ATTR2F( attr, v[0], v[1] ); +} + +static void tnl_MultiTexCoord3fARB( GLenum target, GLfloat s, GLfloat t, + GLfloat r) +{ + GLint attr = (target - GL_TEXTURE0_ARB) + VERT_ATTRIB_TEX0; + ATTR3F( attr, s, t, r ); +} + +static void tnl_MultiTexCoord3fvARB( GLenum target, const GLfloat *v ) +{ + GLint attr = (target - GL_TEXTURE0_ARB) + VERT_ATTRIB_TEX0; + ATTR3F( attr, v[0], v[1], v[2] ); +} + +static void tnl_MultiTexCoord4fARB( GLenum target, GLfloat s, GLfloat t, + GLfloat r, GLfloat q ) +{ + GLint attr = (target - GL_TEXTURE0_ARB) + VERT_ATTRIB_TEX0; + ATTR4F( attr, s, t, r, q ); +} + +static void tnl_MultiTexCoord4fvARB( GLenum target, const GLfloat *v ) +{ + GLint attr = (target - GL_TEXTURE0_ARB) + VERT_ATTRIB_TEX0; + ATTR4F( attr, v[0], v[1], v[2], v[3] ); +} + + +/* NV_vertex_program: + * + * *** Need second dispatch layer above this for size tracking. One + * *** dispatch layer handles both VertexAttribute and MultiTexCoord + */ +static void tnl_VertexAttrib1fNV( GLuint index, GLfloat s ) +{ + ATTR1F( index, s ); +} + +static void tnl_VertexAttrib1fvNV( GLuint index, const GLfloat *v ) +{ + ATTR1F( index, v[0] ); +} + +static void tnl_VertexAttrib2fNV( GLuint index, GLfloat s, GLfloat t ) +{ + ATTR2F( index, s, t ); +} + +static void tnl_VertexAttrib2fvNV( GLuint index, const GLfloat *v ) +{ + ATTR2F( index, v[0], v[1] ); +} + +static void tnl_VertexAttrib3fNV( GLuint index, GLfloat s, GLfloat t, + GLfloat r ) +{ + ATTR3F( index, s, t, r ); +} + +static void tnl_VertexAttrib3fvNV( GLuint index, const GLfloat *v ) +{ + ATTR3F( index, v[0], v[1], v[2] ); +} + +static void tnl_VertexAttrib4fNV( GLuint index, GLfloat s, GLfloat t, + GLfloat r, GLfloat q ) +{ + ATTR4F( index, s, t, r, q ); +} + +static void tnl_VertexAttrib4fvNV( GLuint index, const GLfloat *v ) +{ + ATTR4F( index, v[0], v[1], v[2], v[3] ); +} + + +/* Miscellaneous: (These don't alias NV attributes, right?) + */ +static void tnl_EdgeFlag( GLboolean flag ) +{ + GET_TNL; + tnl->edgeflagptr[0] = flag; +} + +static void tnl_EdgeFlagv( const GLboolean *flag ) +{ + GET_TNL; + tnl->edgeflagptr[0] = *flag; +} + +static void tnl_Indexi( GLint idx ) +{ + GET_TNL; + tnl->indexptr[0] = idx; +} + +static void tnl_Indexiv( const GLint *idx ) +{ + GET_TNL; + tnl->indexptr[0] = *idx; +} + + + +/* Could use dispatch switching to build 'ranges' of eval vertices for + * each type, avoiding need for flags. (Make + * evalcoords/evalpoints/vertices/attr0 mutually exclusive) + * --> In which case, may as well use Vertex{12}f{v} here. + */ +static void _tnl_EvalCoord1f( GLfloat u ) +{ + ATTR1F( VERT_ATTRIB_POS, u ); +} + +static void _tnl_EvalCoord1fv( const GLfloat *v ) +{ + ATTR1F( VERT_ATTRIB_POS, v[0] ); +} + +static void _tnl_EvalCoord2f( GLfloat u, GLfloat v ) +{ + ATTR2F( VERT_ATTRIB_POS, u, v ); +} + +static void _tnl_EvalCoord2fv( const GLfloat *v ) +{ + ATTR2F( VERT_ATTRIB_POS, v[0], v[1] ); +} + + +/* Materials: + * *** Treat as more vertex attributes + */ +static void _tnl_Materialfv( GLenum face, GLenum pname, + const GLfloat *params ) +{ + if (MESA_VERBOSE & DEBUG_VFMT) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (tnl->prim[0] != GL_POLYGON+1) { + VFMT_FALLBACK( __FUNCTION__ ); + glMaterialfv( face, pname, params ); + return; + } + _mesa_noop_Materialfv( face, pname, params ); +} + + + + +/* Codegen support + */ +static struct dynfn *lookup( struct dynfn *l, int key ) +{ + struct dynfn *f; + + foreach( f, l ) { + if (f->key == key) + return f; + } + + return 0; +} + +/* Can't use the loopback template for this: + */ +#define CHOOSE(FN, FNTYPE, MASK, ACTIVE, ARGS1, ARGS2 ) \ +static void choose_##FN ARGS1 \ +{ \ + int key = tnl->vertex_format & (MASK|ACTIVE); \ + struct dynfn *dfn = lookup( &tnl->dfn_cache.FN, key ); \ + \ + if (dfn == 0) \ + dfn = tnl->codegen.FN( &vb, key ); \ + else if (MESA_VERBOSE & DEBUG_CODEGEN) \ + fprintf(stderr, "%s -- cached codegen\n", __FUNCTION__ ); \ + \ + if (dfn) \ + tnl->context->Exec->FN = (FNTYPE)(dfn->code); \ + else { \ + if (MESA_VERBOSE & DEBUG_CODEGEN) \ + fprintf(stderr, "%s -- generic version\n", __FUNCTION__ ); \ + tnl->context->Exec->FN = tnl_##FN; \ + } \ + \ + tnl->context->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT; \ + tnl->context->Exec->FN ARGS2; \ +} + + + +CHOOSE(Normal3f, p3f, 3, VERT_ATTRIB_NORMAL, + (GLfloat a,GLfloat b,GLfloat c), (a,b,c)) +CHOOSE(Normal3fv, pfv, 3, VERT_ATTRIB_NORMAL, + (const GLfloat *v), (v)) + +CHOOSE(Color4ub, p4ub, 4, VERT_ATTRIB_COLOR0, + (GLubyte a,GLubyte b, GLubyte c, GLubyte d), (a,b,c,d)) +CHOOSE(Color4ubv, pubv, 4, VERT_ATTRIB_COLOR0, + (const GLubyte *v), (v)) +CHOOSE(Color3ub, p3ub, 3, VERT_ATTRIB_COLOR0, + (GLubyte a,GLubyte b, GLubyte c), (a,b,c)) +CHOOSE(Color3ubv, pubv, 3, VERT_ATTRIB_COLOR0, + (const GLubyte *v), (v)) + +CHOOSE(Color4f, p4f, 4, VERT_ATTRIB_COLOR0, + (GLfloat a,GLfloat b, GLfloat c, GLfloat d), (a,b,c,d)) +CHOOSE(Color4fv, pfv, 4, VERT_ATTRIB_COLOR0, + (const GLfloat *v), (v)) +CHOOSE(Color3f, p3f, 3, VERT_ATTRIB_COLOR0, + (GLfloat a,GLfloat b, GLfloat c), (a,b,c)) +CHOOSE(Color3fv, pfv, 3, VERT_ATTRIB_COLOR0, + (const GLfloat *v), (v)) + + +CHOOSE(SecondaryColor3ubEXT, p3ub, VERT_ATTRIB_COLOR1, + (GLubyte a,GLubyte b, GLubyte c), (a,b,c)) +CHOOSE(SecondaryColor3ubvEXT, pubv, VERT_ATTRIB_COLOR1, + (const GLubyte *v), (v)) +CHOOSE(SecondaryColor3fEXT, p3f, VERT_ATTRIB_COLOR1, + (GLfloat a,GLfloat b, GLfloat c), (a,b,c)) +CHOOSE(SecondaryColor3fvEXT, pfv, VERT_ATTRIB_COLOR1, + (const GLfloat *v), (v)) + +CHOOSE(TexCoord2f, p2f, VERT_ATTRIB_TEX0, + (GLfloat a,GLfloat b), (a,b)) +CHOOSE(TexCoord2fv, pfv, VERT_ATTRIB_TEX0, + (const GLfloat *v), (v)) +CHOOSE(TexCoord1f, p1f, VERT_ATTRIB_TEX0, + (GLfloat a), (a)) +CHOOSE(TexCoord1fv, pfv, VERT_ATTRIB_TEX0, + (const GLfloat *v), (v)) + +CHOOSE(MultiTexCoord2fARB, pe2f, VERT_ATTRIB_TEX0, + (GLenum u,GLfloat a,GLfloat b), (u,a,b)) +CHOOSE(MultiTexCoord2fvARB, pefv, MASK_ST_ALL, ACTIVE_ST_ALL, + (GLenum u,const GLfloat *v), (u,v)) +CHOOSE(MultiTexCoord1fARB, pe1f, MASK_ST_ALL, ACTIVE_ST_ALL, + (GLenum u,GLfloat a), (u,a)) +CHOOSE(MultiTexCoord1fvARB, pefv, MASK_ST_ALL, ACTIVE_ST_ALL, + (GLenum u,const GLfloat *v), (u,v)) + +CHOOSE(Vertex3f, p3f, VERT_ATTRIB_POS, + (GLfloat a,GLfloat b,GLfloat c), (a,b,c)) +CHOOSE(Vertex3fv, pfv, VERT_ATTRIB_POS, + (const GLfloat *v), (v)) +CHOOSE(Vertex2f, p2f, VERT_ATTRIB_POS, + (GLfloat a,GLfloat b), (a,b)) +CHOOSE(Vertex2fv, pfv, VERT_ATTRIB_POS, + (const GLfloat *v), (v)) + + + + + +void _tnl_InitVtxfmtChoosers( GLvertexformat *vfmt ) +{ + vfmt->Color3f = choose_Color3f; + vfmt->Color3fv = choose_Color3fv; + vfmt->Color3ub = choose_Color3ub; + vfmt->Color3ubv = choose_Color3ubv; + vfmt->Color4f = choose_Color4f; + vfmt->Color4fv = choose_Color4fv; + vfmt->Color4ub = choose_Color4ub; + vfmt->Color4ubv = choose_Color4ubv; + vfmt->SecondaryColor3fEXT = choose_SecondaryColor3fEXT; + vfmt->SecondaryColor3fvEXT = choose_SecondaryColor3fvEXT; + vfmt->SecondaryColor3ubEXT = choose_SecondaryColor3ubEXT; + vfmt->SecondaryColor3ubvEXT = choose_SecondaryColor3ubvEXT; + vfmt->MultiTexCoord1fARB = choose_MultiTexCoord1fARB; + vfmt->MultiTexCoord1fvARB = choose_MultiTexCoord1fvARB; + vfmt->MultiTexCoord2fARB = choose_MultiTexCoord2fARB; + vfmt->MultiTexCoord2fvARB = choose_MultiTexCoord2fvARB; + vfmt->Normal3f = choose_Normal3f; + vfmt->Normal3fv = choose_Normal3fv; + vfmt->TexCoord1f = choose_TexCoord1f; + vfmt->TexCoord1fv = choose_TexCoord1fv; + vfmt->TexCoord2f = choose_TexCoord2f; + vfmt->TexCoord2fv = choose_TexCoord2fv; + vfmt->Vertex2f = choose_Vertex2f; + vfmt->Vertex2fv = choose_Vertex2fv; + vfmt->Vertex3f = choose_Vertex3f; + vfmt->Vertex3fv = choose_Vertex3fv; + vfmt->TexCoord3f = choose_TexCoord3f; + vfmt->TexCoord3fv = choose_TexCoord3fv; + vfmt->TexCoord4f = choose_TexCoord4f; + vfmt->TexCoord4fv = choose_TexCoord4fv; + vfmt->MultiTexCoord3fARB = choose_MultiTexCoord3fARB; + vfmt->MultiTexCoord3fvARB = choose_MultiTexCoord3fvARB; + vfmt->MultiTexCoord4fARB = choose_MultiTexCoord4fARB; + vfmt->MultiTexCoord4fvARB = choose_MultiTexCoord4fvARB; + vfmt->Vertex4f = choose_Vertex4f; + vfmt->Vertex4fv = choose_Vertex4fv; + vfmt->FogCoordfvEXT = choose_FogCoordfvEXT; + vfmt->FogCoordfEXT = choose_FogCoordfEXT; + vfmt->EdgeFlag = choose_EdgeFlag; + vfmt->EdgeFlagv = choose_EdgeFlagv; + vfmt->Indexi = choose_Indexi; + vfmt->Indexiv = choose_Indexiv; + vfmt->EvalCoord1f = choose_EvalCoord1f; + vfmt->EvalCoord1fv = choose_EvalCoord1fv; + vfmt->EvalCoord2f = choose_EvalCoord2f; + vfmt->EvalCoord2fv = choose_EvalCoord2fv; + vfmt->EvalMesh1 = choose_EvalMesh1; + vfmt->EvalMesh2 = choose_EvalMesh2; + vfmt->EvalPoint1 = choose_EvalPoint1; + vfmt->EvalPoint2 = choose_EvalPoint2; + + vfmt->Materialfv = _tnl_Materialfv; +} + + +static struct dynfn *codegen_noop( struct _vb *vb, int key ) +{ + (void) vb; (void) key; + return 0; +} + +void _tnl_InitCodegen( struct dfn_generators *gen ) +{ + gen->Vertex2f = codegen_noop; + gen->Vertex2fv = codegen_noop; + gen->Vertex3f = codegen_noop; + gen->Vertex3fv = codegen_noop; + gen->Vertex4f = codegen_noop; + gen->Vertex4fv = codegen_noop; + + gen->Attr1f = codegen_noop; + gen->Attr1fv = codegen_noop; + gen->Attr2f = codegen_noop; + gen->Attr2fv = codegen_noop; + gen->Attr3f = codegen_noop; + gen->Attr3fv = codegen_noop; + gen->Attr4f = codegen_noop; + gen->Attr4fv = codegen_noop; + gen->Attr3ub = codegen_noop; + gen->Attr3ubv = codegen_noop; + gen->Attr4ub = codegen_noop; + gen->Attr4ubv = codegen_noop; + + /* Probably need two versions of this, one for the front end + * (double dispatch), one for the back end (do the work) -- but + * will also need a second level of CHOOSE functions? + * -- Generate the dispatch layer using the existing templates somehow. + * -- Generate the backend and 2nd level choosers here. + * -- No need for a chooser on the top level. + * -- Can aliasing help -- ie can NVAttr1f == Attr1f/Vertex2f at this level (index is known) + */ + gen->NVAttr1f = codegen_noop; + gen->NVAttr1fv = codegen_noop; + gen->NVAttr2f = codegen_noop; + gen->NVAttr2fv = codegen_noop; + gen->NVAttr3f = codegen_noop; + gen->NVAttr3fv = codegen_noop; + gen->NVAttr4f = codegen_noop; + gen->NVAttr4fv = codegen_noop; + + gen->MTAttr1f = codegen_noop; + gen->MTAttr1fv = codegen_noop; + gen->MTAttr2f = codegen_noop; + gen->MTAttr2fv = codegen_noop; + gen->MTAttr3f = codegen_noop; + gen->MTAttr3fv = codegen_noop; + gen->MTAttr4f = codegen_noop; + gen->MTAttr4fv = codegen_noop; + + if (!getenv("MESA_NO_CODEGEN")) { +#if defined(USE_X86_ASM) + _tnl_InitX86Codegen( gen ); +#endif + +#if defined(USE_SSE_ASM) + _tnl_InitSSECodegen( gen ); +#endif + +#if defined(USE_3DNOW_ASM) +#endif + +#if defined(USE_SPARC_ASM) +#endif + } +} diff --git a/src/mesa/tnl/t_vtx_api.h b/src/mesa/tnl/t_vtx_api.h new file mode 100644 index 0000000000..6bfdbe8fe3 --- /dev/null +++ b/src/mesa/tnl/t_vtx_api.h @@ -0,0 +1,234 @@ +/* $XFree86$ */ +/************************************************************************** + +Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * + */ + +#ifndef __RADEON_VTXFMT_H__ +#define __RADEON_VTXFMT_H__ + +#ifdef GLX_DIRECT_RENDERING + +#include "_tnl__context.h" + +extern void _tnl_UpdateVtxfmt( GLcontext *ctx ); +extern void _tnl_InitVtxfmt( GLcontext *ctx ); +extern void _tnl_InvalidateVtxfmt( GLcontext *ctx ); +extern void _tnl_DestroyVtxfmt( GLcontext *ctx ); + +typedef void (*p4f)( GLfloat, GLfloat, GLfloat, GLfloat ); +typedef void (*p3f)( GLfloat, GLfloat, GLfloat ); +typedef void (*p2f)( GLfloat, GLfloat ); +typedef void (*p1f)( GLfloat ); +typedef void (*pe2f)( GLenum, GLfloat, GLfloat ); +typedef void (*pe1f)( GLenum, GLfloat ); +typedef void (*p4ub)( GLubyte, GLubyte, GLubyte, GLubyte ); +typedef void (*p3ub)( GLubyte, GLubyte, GLubyte ); +typedef void (*pfv)( const GLfloat * ); +typedef void (*pefv)( GLenum, const GLfloat * ); +typedef void (*pubv)( const GLubyte * ); + +/* Want to keep a cache of these around. Each is parameterized by + * only a single value which has only a small range. Only expect a + * few, so just rescan the list each time? + */ +struct dynfn { + struct dynfn *next, *prev; + int key; + char *code; +}; + +struct dfn_lists { + struct dynfn Vertex2f; + struct dynfn Vertex2fv; + struct dynfn Vertex3f; + struct dynfn Vertex3fv; + struct dynfn Color4ub; + struct dynfn Color4ubv; + struct dynfn Color3ub; + struct dynfn Color3ubv; + struct dynfn Color4f; + struct dynfn Color4fv; + struct dynfn Color3f; + struct dynfn Color3fv; + struct dynfn SecondaryColor3ubEXT; + struct dynfn SecondaryColor3ubvEXT; + struct dynfn SecondaryColor3fEXT; + struct dynfn SecondaryColor3fvEXT; + struct dynfn Normal3f; + struct dynfn Normal3fv; + struct dynfn TexCoord2f; + struct dynfn TexCoord2fv; + struct dynfn TexCoord1f; + struct dynfn TexCoord1fv; + struct dynfn MultiTexCoord2fARB; + struct dynfn MultiTexCoord2fvARB; + struct dynfn MultiTexCoord1fARB; + struct dynfn MultiTexCoord1fvARB; +}; + +struct _vb; + +struct dfn_generators { + struct dynfn *(*Vertex2f)( struct _vb *, int ); + struct dynfn *(*Vertex2fv)( struct _vb *, int ); + struct dynfn *(*Vertex3f)( struct _vb *, int ); + struct dynfn *(*Vertex3fv)( struct _vb *, int ); + struct dynfn *(*Color4ub)( struct _vb *, int ); + struct dynfn *(*Color4ubv)( struct _vb *, int ); + struct dynfn *(*Color3ub)( struct _vb *, int ); + struct dynfn *(*Color3ubv)( struct _vb *, int ); + struct dynfn *(*Color4f)( struct _vb *, int ); + struct dynfn *(*Color4fv)( struct _vb *, int ); + struct dynfn *(*Color3f)( struct _vb *, int ); + struct dynfn *(*Color3fv)( struct _vb *, int ); + struct dynfn *(*SecondaryColor3ubEXT)( struct _vb *, int ); + struct dynfn *(*SecondaryColor3ubvEXT)( struct _vb *, int ); + struct dynfn *(*SecondaryColor3fEXT)( struct _vb *, int ); + struct dynfn *(*SecondaryColor3fvEXT)( struct _vb *, int ); + struct dynfn *(*Normal3f)( struct _vb *, int ); + struct dynfn *(*Normal3fv)( struct _vb *, int ); + struct dynfn *(*TexCoord2f)( struct _vb *, int ); + struct dynfn *(*TexCoord2fv)( struct _vb *, int ); + struct dynfn *(*TexCoord1f)( struct _vb *, int ); + struct dynfn *(*TexCoord1fv)( struct _vb *, int ); + struct dynfn *(*MultiTexCoord2fARB)( struct _vb *, int ); + struct dynfn *(*MultiTexCoord2fvARB)( struct _vb *, int ); + struct dynfn *(*MultiTexCoord1fARB)( struct _vb *, int ); + struct dynfn *(*MultiTexCoord1fvARB)( struct _vb *, int ); +}; + +struct prim { + GLuint start; + GLuint end; + GLuint prim; +}; + +#define _TNL__MAX_PRIMS 64 + + + +struct tnl_vbinfo { + /* Keep these first: referenced from codegen templates: + */ + GLint counter; + GLint *dmaptr; + void (*notify)( void ); + union { float f; int i; GLubyte ub4[4]; } vertex[16*4]; + + GLfloat *attrptr[16]; + GLuint size[16]; + + GLenum *prim; /* &ctx->Driver.CurrentExecPrimitive */ + GLuint primflags; + + GLboolean installed; + GLboolean recheck; + + GLint vertex_size; + GLint initial_counter; + GLint nrverts; + GLuint vertex_format; + + GLuint installed_vertex_format; + + struct prim primlist[RADEON_MAX_PRIMS]; + int nrprims; + + struct dfn_lists dfn_cache; + struct dfn_generators codegen; + GLvertexformat vtxfmt; +}; + + +extern void _tnl_InitVtxfmtChoosers( GLvertexformat *vfmt ); + + +#define FIXUP( CODE, OFFSET, CHECKVAL, NEWVAL ) \ +do { \ + int *icode = (int *)(CODE+OFFSET); \ + assert (*icode == CHECKVAL); \ + *icode = (int)NEWVAL; \ +} while (0) + + +/* Useful for figuring out the offsets: + */ +#define FIXUP2( CODE, OFFSET, CHECKVAL, NEWVAL ) \ +do { \ + while (*(int *)(CODE+OFFSET) != CHECKVAL) OFFSET++; \ + fprintf(stderr, "%s/%d CVAL %x OFFSET %d\n", __FUNCTION__, \ + __LINE__, CHECKVAL, OFFSET); \ + *(int *)(CODE+OFFSET) = (int)NEWVAL; \ + OFFSET += 4; \ +} while (0) + +/* + */ +void _tnl_InitCodegen( struct dfn_generators *gen ); +void _tnl_InitX86Codegen( struct dfn_generators *gen ); +void _tnl_InitSSECodegen( struct dfn_generators *gen ); + +void _tnl_copy_to_current( GLcontext *ctx ); + + +/* Defined in tnl_vtxfmt_c.c. + */ +struct dynfn *tnl_makeX86Vertex2f( TNLcontext *, int ); +struct dynfn *tnl_makeX86Vertex2fv( TNLcontext *, int ); +struct dynfn *tnl_makeX86Vertex3f( TNLcontext *, int ); +struct dynfn *tnl_makeX86Vertex3fv( TNLcontext *, int ); +struct dynfn *tnl_makeX86Color4ub( TNLcontext *, int ); +struct dynfn *tnl_makeX86Color4ubv( TNLcontext *, int ); +struct dynfn *tnl_makeX86Color3ub( TNLcontext *, int ); +struct dynfn *tnl_makeX86Color3ubv( TNLcontext *, int ); +struct dynfn *tnl_makeX86Color4f( TNLcontext *, int ); +struct dynfn *tnl_makeX86Color4fv( TNLcontext *, int ); +struct dynfn *tnl_makeX86Color3f( TNLcontext *, int ); +struct dynfn *tnl_makeX86Color3fv( TNLcontext *, int ); +struct dynfn *tnl_makeX86SecondaryColor3ubEXT( TNLcontext *, int ); +struct dynfn *tnl_makeX86SecondaryColor3ubvEXT( TNLcontext *, int ); +struct dynfn *tnl_makeX86SecondaryColor3fEXT( TNLcontext *, int ); +struct dynfn *tnl_makeX86SecondaryColor3fvEXT( TNLcontext *, int ); +struct dynfn *tnl_makeX86Normal3f( TNLcontext *, int ); +struct dynfn *tnl_makeX86Normal3fv( TNLcontext *, int ); +struct dynfn *tnl_makeX86TexCoord2f( TNLcontext *, int ); +struct dynfn *tnl_makeX86TexCoord2fv( TNLcontext *, int ); +struct dynfn *tnl_makeX86TexCoord1f( TNLcontext *, int ); +struct dynfn *tnl_makeX86TexCoord1fv( TNLcontext *, int ); +struct dynfn *tnl_makeX86MultiTexCoord2fARB( TNLcontext *, int ); +struct dynfn *tnl_makeX86MultiTexCoord2fvARB( TNLcontext *, int ); +struct dynfn *tnl_makeX86MultiTexCoord1fARB( TNLcontext *, int ); +struct dynfn *tnl_makeX86MultiTexCoord1fvARB( TNLcontext *, int ); + + +#endif +#endif diff --git a/src/mesa/tnl/t_vtx_exec.c b/src/mesa/tnl/t_vtx_exec.c new file mode 100644 index 0000000000..8470d6ab35 --- /dev/null +++ b/src/mesa/tnl/t_vtx_exec.c @@ -0,0 +1,636 @@ +/* $XFree86$ */ +/************************************************************************** + +Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * + */ +#include "api_noop.h" +#include "api_arrayelt.h" +#include "context.h" +#include "mem.h" +#include "mmath.h" +#include "mtypes.h" +#include "enums.h" +#include "glapi.h" +#include "colormac.h" +#include "light.h" +#include "state.h" +#include "vtxfmt.h" + +#include "tnl/tnl.h" +#include "tnl/t_context.h" +#include "tnl/t_array_api.h" + +static void _tnl_FlushVertices( GLcontext *, GLuint ); + + +void tnl_copy_to_current( GLcontext *ctx ) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + GLuint flag = tnl->vertex_format; + GLint i; + + assert(ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT); + + for (i = 0 ; i < 16 ; i++) + if (flag & (1<<i)) + COPY_4FV( ctx->Current.Attrib[i], tnl->attribptr[i] ); + + if (flag & VERT_BIT_INDEX) + ctx->Current.Index = tnl->indexptr[0]; + + if (flag & VERT_BIT_EDGEFLAG) + ctx->Current.EdgeFlag = tnl->edgeflagptr[0]; + + if (flag & VERT_BIT_MATERIAL) { + _mesa_update_material( ctx, + IM->Material[IM->LastMaterial], + IM->MaterialOrMask ); + + tnl->Driver.NotifyMaterialChange( ctx ); + } + + + ctx->Driver.NeedFlush &= ~FLUSH_UPDATE_CURRENT; +} + +static GLboolean discreet_gl_prim[GL_POLYGON+1] = { + 1, /* 0 points */ + 1, /* 1 lines */ + 0, /* 2 line_strip */ + 0, /* 3 line_loop */ + 1, /* 4 tris */ + 0, /* 5 tri_fan */ + 0, /* 6 tri_strip */ + 1, /* 7 quads */ + 0, /* 8 quadstrip */ + 0, /* 9 poly */ +}; + +/* Optimize the primitive list: ONLY FOR EXECUTE ATM + */ +static void optimize_prims( TNLcontext *tnl ) +{ + int i, j; + + if (tnl->nrprims <= 1) + return; + + for (j = 0, i = 1 ; i < tnl->nrprims; i++) { + int pj = tnl->primlist[j].prim & 0xf; + int pi = tnl->primlist[i].prim & 0xf; + + if (pj == pi && discreet_gl_prim[pj] && + tnl->primlist[i].start == tnl->primlist[j].end) { + tnl->primlist[j].end = tnl->primlist[i].end; + } + else { + j++; + if (j != i) tnl->primlist[j] = tnl->primlist[i]; + } + } + + tnl->nrprims = j+1; +} + + +/* Bind vertex buffer pointers, run pipeline: + */ +static void flush_prims( TNLcontext *tnl ) +{ + int i,j; + + tnl->dma.current.ptr = tnl->dma.current.start += + (tnl->initial_counter - tnl->counter) * tnl->vertex_size * 4; + + tnl->tcl.vertex_format = tnl->vertex_format; + tnl->tcl.aos_components[0] = &tmp; + tnl->tcl.nr_aos_components = 1; + tnl->dma.flush = 0; + + tnl->Driver.RunPipeline( ... ); + + tnl->nrprims = 0; +} + + +static void start_prim( TNLcontext *tnl, GLuint mode ) +{ + if (MESA_VERBOSE & DEBUG_VFMT) + fprintf(stderr, "%s %d\n", __FUNCTION__, + tnl->initial_counter - tnl->counter); + + tnl->primlist[tnl->nrprims].start = tnl->initial_counter - tnl->counter; + tnl->primlist[tnl->nrprims].prim = mode; +} + +static void note_last_prim( TNLcontext *tnl, GLuint flags ) +{ + if (MESA_VERBOSE & DEBUG_VFMT) + fprintf(stderr, "%s %d\n", __FUNCTION__, + tnl->initial_counter - tnl->counter); + + if (tnl->prim[0] != GL_POLYGON+1) { + tnl->primlist[tnl->nrprims].prim |= flags; + tnl->primlist[tnl->nrprims].end = tnl->initial_counter - tnl->counter; + + if (++tnl->nrprims == TNL_MAX_PRIMS) + flush_prims( tnl ); + } +} + + +static void copy_vertex( TNLcontext *tnl, GLuint n, GLfloat *dst ) +{ + GLuint i; + GLfloat *src = (GLfloat *)(tnl->dma.current.address + + tnl->dma.current.ptr + + (tnl->primlist[tnl->nrprims].start + n) * + tnl->vertex_size * 4); + + if (MESA_VERBOSE & DEBUG_VFMT) + fprintf(stderr, "copy_vertex %d\n", + tnl->primlist[tnl->nrprims].start + n); + + for (i = 0 ; i < tnl->vertex_size; i++) { + dst[i] = src[i]; + } +} + +/* NOTE: This actually reads the copied vertices back from uncached + * memory. Could also use the counter/notify mechanism to populate + * tmp on the fly as vertices are generated. + */ +static GLuint copy_wrapped_verts( TNLcontext *tnl, GLfloat (*tmp)[15] ) +{ + GLuint ovf, i; + GLuint nr = (tnl->initial_counter - tnl->counter) - tnl->primlist[tnl->nrprims].start; + + if (MESA_VERBOSE & DEBUG_VFMT) + fprintf(stderr, "%s %d verts\n", __FUNCTION__, nr); + + switch( tnl->prim[0] ) + { + case GL_POINTS: + return 0; + case GL_LINES: + ovf = nr&1; + for (i = 0 ; i < ovf ; i++) + copy_vertex( tnl, nr-ovf+i, tmp[i] ); + return i; + case GL_TRIANGLES: + ovf = nr%3; + for (i = 0 ; i < ovf ; i++) + copy_vertex( tnl, nr-ovf+i, tmp[i] ); + return i; + case GL_QUADS: + ovf = nr&3; + for (i = 0 ; i < ovf ; i++) + copy_vertex( tnl, nr-ovf+i, tmp[i] ); + return i; + case GL_LINE_STRIP: + if (nr == 0) + return 0; + copy_vertex( tnl, nr-1, tmp[0] ); + return 1; + case GL_LINE_LOOP: + case GL_TRIANGLE_FAN: + case GL_POLYGON: + if (nr == 0) + return 0; + else if (nr == 1) { + copy_vertex( tnl, 0, tmp[0] ); + return 1; + } else { + copy_vertex( tnl, 0, tmp[0] ); + copy_vertex( tnl, nr-1, tmp[1] ); + return 2; + } + case GL_TRIANGLE_STRIP: + ovf = MIN2( nr-1, 2 ); + for (i = 0 ; i < ovf ; i++) + copy_vertex( tnl, nr-ovf+i, tmp[i] ); + return i; + case GL_QUAD_STRIP: + ovf = MIN2( nr-1, 2 ); + if (nr > 2) ovf += nr&1; + for (i = 0 ; i < ovf ; i++) + copy_vertex( tnl, nr-ovf+i, tmp[i] ); + return i; + default: + assert(0); + return 0; + } +} + + + +/* Extend for vertex-format changes on wrap: + */ +static void wrap_buffer( void ) +{ + TNLcontext *tnl = tnl->tnl; + GLfloat tmp[3][15]; + GLuint i, nrverts; + + if (MESA_VERBOSE & (DEBUG_VFMT|DEBUG_PRIMS)) + fprintf(stderr, "%s %d\n", __FUNCTION__, + tnl->initial_counter - tnl->counter); + + /* Don't deal with parity. *** WONT WORK FOR COMPILE + */ + if ((((tnl->initial_counter - tnl->counter) - + tnl->primlist[tnl->nrprims].start) & 1)) { + tnl->counter++; + tnl->initial_counter++; + return; + } + + /* Copy vertices out of dma: + */ + nrverts = copy_dma_verts( tnl, tmp ); + + if (MESA_VERBOSE & DEBUG_VFMT) + fprintf(stderr, "%d vertices to copy\n", nrverts); + + + /* Finish the prim at this point: + */ + note_last_prim( tnl, 0 ); + flush_prims( tnl ); + + /* Reset counter, dmaptr + */ + tnl->dmaptr = (int *)(tnl->dma.current.ptr + tnl->dma.current.address); + tnl->counter = (tnl->dma.current.end - tnl->dma.current.ptr) / + (tnl->vertex_size * 4); + tnl->counter--; + tnl->initial_counter = tnl->counter; + tnl->notify = wrap_buffer; + + tnl->dma.flush = flush_prims; + start_prim( tnl, tnl->prim[0] ); + + + /* Reemit saved vertices + * *** POSSIBLY IN NEW FORMAT + * --> Can't always extend at end of vertex? + */ + for (i = 0 ; i < nrverts; i++) { + if (MESA_VERBOSE & DEBUG_VERTS) { + int j; + fprintf(stderr, "re-emit vertex %d to %p\n", i, tnl->dmaptr); + if (MESA_VERBOSE & DEBUG_VERBOSE) + for (j = 0 ; j < tnl->vertex_size; j++) + fprintf(stderr, "\t%08x/%f\n", *(int*)&tmp[i][j], tmp[i][j]); + } + + memcpy( tnl->dmaptr, tmp[i], tnl->vertex_size * 4 ); + tnl->dmaptr += tnl->vertex_size; + tnl->counter--; + } +} + + + +/* Always follow data, don't try to predict what's necessary. + */ +static GLboolean check_vtx_fmt( GLcontext *ctx ) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + + if (ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT) + ctx->Driver.FlushVertices( ctx, FLUSH_UPDATE_CURRENT ); + + + TNL_NEWPRIM(tnl); + tnl->vertex_format = VERT_BIT_POS; + tnl->prim = &ctx->Driver.CurrentExecPrimitive; + + + /* Currently allow the full 4 components per attrib. Can use the + * mechanism from radeon driver color handling to reduce this (and + * also to store ubyte colors where these are incoming). This + * won't work for compile mode. + * + * Only adding components when they are first received eliminates + * the need for displaylist fixup, as there are no 'empty' slots + * at the start of buffers. + */ + for (i = 0 ; i < 16 ; i++) { + if (ind & (1<<i)) { + tnl->attribptr[i] = &tnl->vertex[tnl->vertex_size].f; + tnl->vertex_size += 4; + tnl->attribptr[i][0] = ctx->Current.Attrib[i][0]; + tnl->attribptr[i][1] = ctx->Current.Attrib[i][1]; + tnl->attribptr[i][2] = ctx->Current.Attrib[i][2]; + tnl->attribptr[i][3] = ctx->Current.Attrib[i][3]; + } + else + tnl->attribptr[i] = ctx->Current.Attrib[i]; + } + + /* Edgeflag, Index: + */ + for (i = 16 ; i < 18 ; i++) + ; + + /* Materials: + */ + for (i = 18 ; i < 28 ; i++) + ; + + /* Eval: + */ + for (i = 28 ; i < 29 ; i++) + ; + + + if (tnl->installed_vertex_format != tnl->vertex_format) { + if (MESA_VERBOSE & DEBUG_VFMT) + fprintf(stderr, "reinstall on vertex_format change\n"); + _mesa_install_exec_vtxfmt( ctx, &tnl->vtxfmt ); + tnl->installed_vertex_format = tnl->vertex_format; + } + + return GL_TRUE; +} + + +void _tnl_InvalidateVtxfmt( GLcontext *ctx ) +{ + tnl->recheck = GL_TRUE; + tnl->fell_back = GL_FALSE; +} + + + + +static void _tnl_ValidateVtxfmt( GLcontext *ctx ) +{ + if (MESA_VERBOSE & DEBUG_VFMT) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (ctx->Driver.NeedFlush) + ctx->Driver.FlushVertices( ctx, ctx->Driver.NeedFlush ); + + tnl->recheck = GL_FALSE; + + if (check_vtx_fmt( ctx )) { + if (!tnl->installed) { + if (MESA_VERBOSE & DEBUG_VFMT) + fprintf(stderr, "reinstall (new install)\n"); + + _mesa_install_exec_vtxfmt( ctx, &tnl->vtxfmt ); + ctx->Driver.FlushVertices = _tnl_FlushVertices; + tnl->installed = GL_TRUE; + } + else + fprintf(stderr, "%s: already installed", __FUNCTION__); + } + else { + if (MESA_VERBOSE & DEBUG_VFMT) + fprintf(stderr, "%s: failed\n", __FUNCTION__); + + if (tnl->installed) { + if (tnl->tnl->dma.flush) + tnl->tnl->dma.flush( tnl->tnl ); + _tnl_wakeup_exec( ctx ); + tnl->installed = GL_FALSE; + } + } +} + + + + + +/* Begin/End + */ +static void _tnl_Begin( GLenum mode ) +{ + GLcontext *ctx = tnl->context; + TNLcontext *tnl = tnl->tnl; + + if (MESA_VERBOSE & DEBUG_VFMT) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (mode > GL_POLYGON) { + _mesa_error( ctx, GL_INVALID_ENUM, "glBegin" ); + return; + } + + if (tnl->prim[0] != GL_POLYGON+1) { + _mesa_error( ctx, GL_INVALID_OPERATION, "glBegin" ); + return; + } + + if (ctx->NewState) + _mesa_update_state( ctx ); + + if (tnl->recheck) + _tnl_ValidateVtxfmt( ctx ); + + if (tnl->dma.flush && tnl->counter < 12) { + if (MESA_VERBOSE & DEBUG_VFMT) + fprintf(stderr, "%s: flush almost-empty buffers\n", __FUNCTION__); + flush_prims( tnl ); + } + + if (!tnl->dma.flush) { + if (tnl->dma.current.ptr + 12*tnl->vertex_size*4 > + tnl->dma.current.end) { + TNL_NEWPRIM( tnl ); + _tnl_RefillCurrentDmaRegion( tnl ); + } + + tnl->dmaptr = (int *)(tnl->dma.current.address + tnl->dma.current.ptr); + tnl->counter = (tnl->dma.current.end - tnl->dma.current.ptr) / + (tnl->vertex_size * 4); + tnl->counter--; + tnl->initial_counter = tnl->counter; + tnl->notify = wrap_buffer; + tnl->dma.flush = flush_prims; + tnl->context->Driver.NeedFlush |= FLUSH_STORED_VERTICES; + } + + + tnl->prim[0] = mode; + start_prim( tnl, mode | PRIM_BEGIN ); +} + + + + + +static void _tnl_End( void ) +{ + TNLcontext *tnl = tnl->tnl; + GLcontext *ctx = tnl->context; + + if (MESA_VERBOSE & DEBUG_VFMT) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (tnl->prim[0] == GL_POLYGON+1) { + _mesa_error( ctx, GL_INVALID_OPERATION, "glEnd" ); + return; + } + + note_last_prim( tnl, PRIM_END ); + tnl->prim[0] = GL_POLYGON+1; +} + + +static void _tnl_FlushVertices( GLcontext *ctx, GLuint flags ) +{ + if (MESA_VERBOSE & DEBUG_VFMT) + fprintf(stderr, "%s\n", __FUNCTION__); + + assert(tnl->installed); + + if (flags & FLUSH_UPDATE_CURRENT) { + _tnl_copy_to_current( ctx ); + if (MESA_VERBOSE & DEBUG_VFMT) + fprintf(stderr, "reinstall on update_current\n"); + _mesa_install_exec_vtxfmt( ctx, &tnl->vtxfmt ); + ctx->Driver.NeedFlush &= ~FLUSH_UPDATE_CURRENT; + } + + if (flags & FLUSH_STORED_VERTICES) { + TNLcontext *tnl = TNL_CONTEXT( ctx ); + assert (tnl->dma.flush == 0 || + tnl->dma.flush == flush_prims); + if (tnl->dma.flush == flush_prims) + flush_prims( TNL_CONTEXT( ctx ) ); + ctx->Driver.NeedFlush &= ~FLUSH_STORED_VERTICES; + } +} + + + +/* At this point, don't expect very many versions of each function to + * be generated, so not concerned about freeing them? + */ + + +static void _tnl_InitVtxfmt( GLcontext *ctx ) +{ + GLvertexformat *vfmt = &(tnl->vtxfmt); + + MEMSET( vfmt, 0, sizeof(GLvertexformat) ); + + /* Hook in chooser functions for codegen, etc: + */ + _tnl_InitVtxfmtChoosers( vfmt ); + + /* Handled fully in supported states, but no codegen: + */ + vfmt->ArrayElement = _ae_loopback_array_elt; /* generic helper */ + vfmt->Rectf = _mesa_noop_Rectf; /* generic helper */ + vfmt->Begin = _tnl_Begin; + vfmt->End = _tnl_End; + + tnl->context = ctx; + tnl->tnl = TNL_CONTEXT(ctx); + tnl->prim = &ctx->Driver.CurrentExecPrimitive; + tnl->primflags = 0; + + make_empty_list( &tnl->dfn_cache.Vertex2f ); + make_empty_list( &tnl->dfn_cache.Vertex2fv ); + make_empty_list( &tnl->dfn_cache.Vertex3f ); + make_empty_list( &tnl->dfn_cache.Vertex3fv ); + make_empty_list( &tnl->dfn_cache.Color4ub ); + make_empty_list( &tnl->dfn_cache.Color4ubv ); + make_empty_list( &tnl->dfn_cache.Color3ub ); + make_empty_list( &tnl->dfn_cache.Color3ubv ); + make_empty_list( &tnl->dfn_cache.Color4f ); + make_empty_list( &tnl->dfn_cache.Color4fv ); + make_empty_list( &tnl->dfn_cache.Color3f ); + make_empty_list( &tnl->dfn_cache.Color3fv ); + make_empty_list( &tnl->dfn_cache.SecondaryColor3fEXT ); + make_empty_list( &tnl->dfn_cache.SecondaryColor3fvEXT ); + make_empty_list( &tnl->dfn_cache.SecondaryColor3ubEXT ); + make_empty_list( &tnl->dfn_cache.SecondaryColor3ubvEXT ); + make_empty_list( &tnl->dfn_cache.Normal3f ); + make_empty_list( &tnl->dfn_cache.Normal3fv ); + make_empty_list( &tnl->dfn_cache.TexCoord2f ); + make_empty_list( &tnl->dfn_cache.TexCoord2fv ); + make_empty_list( &tnl->dfn_cache.TexCoord1f ); + make_empty_list( &tnl->dfn_cache.TexCoord1fv ); + make_empty_list( &tnl->dfn_cache.MultiTexCoord2fARB ); + make_empty_list( &tnl->dfn_cache.MultiTexCoord2fvARB ); + make_empty_list( &tnl->dfn_cache.MultiTexCoord1fARB ); + make_empty_list( &tnl->dfn_cache.MultiTexCoord1fvARB ); + + _tnl_InitCodegen( &tnl->codegen ); +} + +static void free_funcs( struct dynfn *l ) +{ + struct dynfn *f, *tmp; + foreach_s (f, tmp, l) { + remove_from_list( f ); + ALIGN_FREE( f->code ); + FREE( f ); + } +} + + +static void _tnl_DestroyVtxfmt( GLcontext *ctx ) +{ + count_funcs(); + free_funcs( &tnl->dfn_cache.Vertex2f ); + free_funcs( &tnl->dfn_cache.Vertex2fv ); + free_funcs( &tnl->dfn_cache.Vertex3f ); + free_funcs( &tnl->dfn_cache.Vertex3fv ); + free_funcs( &tnl->dfn_cache.Color4ub ); + free_funcs( &tnl->dfn_cache.Color4ubv ); + free_funcs( &tnl->dfn_cache.Color3ub ); + free_funcs( &tnl->dfn_cache.Color3ubv ); + free_funcs( &tnl->dfn_cache.Color4f ); + free_funcs( &tnl->dfn_cache.Color4fv ); + free_funcs( &tnl->dfn_cache.Color3f ); + free_funcs( &tnl->dfn_cache.Color3fv ); + free_funcs( &tnl->dfn_cache.SecondaryColor3ubEXT ); + free_funcs( &tnl->dfn_cache.SecondaryColor3ubvEXT ); + free_funcs( &tnl->dfn_cache.SecondaryColor3fEXT ); + free_funcs( &tnl->dfn_cache.SecondaryColor3fvEXT ); + free_funcs( &tnl->dfn_cache.Normal3f ); + free_funcs( &tnl->dfn_cache.Normal3fv ); + free_funcs( &tnl->dfn_cache.TexCoord2f ); + free_funcs( &tnl->dfn_cache.TexCoord2fv ); + free_funcs( &tnl->dfn_cache.TexCoord1f ); + free_funcs( &tnl->dfn_cache.TexCoord1fv ); + free_funcs( &tnl->dfn_cache.MultiTexCoord2fARB ); + free_funcs( &tnl->dfn_cache.MultiTexCoord2fvARB ); + free_funcs( &tnl->dfn_cache.MultiTexCoord1fARB ); + free_funcs( &tnl->dfn_cache.MultiTexCoord1fvARB ); +} + diff --git a/src/mesa/tnl/t_vtx_sse.c b/src/mesa/tnl/t_vtx_sse.c new file mode 100644 index 0000000000..240d6cf8b9 --- /dev/null +++ b/src/mesa/tnl/t_vtx_sse.c @@ -0,0 +1,93 @@ +/* $XFree86$ */ +/************************************************************************** + +Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include <stdio.h> +#include <assert.h> +#include "mem.h" +#include "simple_list.h" +#include "t_vtx_api.h" + +#if defined(USE_SSE_ASM) + +/* Build specialized versions of the immediate calls on the fly for + * the current state. ???P4 SSE2 versions??? + */ + + +static struct dynfn *makeSSENormal3fv( struct _vb *vb, int key ) +{ + /* Requires P4 (sse2?) + */ + static unsigned char temp[] = { + 0x8b, 0x44, 0x24, 0x04, /* mov 0x4(%esp,1),%eax */ + 0xba, 0x78, 0x56, 0x34, 0x12, /* mov $0x12345678,%edx */ + 0xf3, 0x0f, 0x7e, 0x00, /* movq (%eax),%xmm0 */ + 0x66, 0x0f, 0x6e, 0x48, 0x08, /* movd 0x8(%eax),%xmm1 */ + 0x66, 0x0f, 0xd6, 0x42, 0x0c, /* movq %xmm0,0xc(%edx) */ + 0x66, 0x0f, 0x7e, 0x4a, 0x14, /* movd %xmm1,0x14(%edx) */ + 0xc3, /* ret */ + }; + + + struct dynfn *dfn = MALLOC_STRUCT( dynfn ); + insert_at_head( &vb->dfn_cache.Normal3fv, dfn ); + dfn->key = key; + + dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); + memcpy (dfn->code, temp, sizeof(temp)); + FIXUP(dfn->code, 5, 0x0, (int)vb->normalptr); + return dfn; +} + +void _tnl_InitSSECodegen( struct dfn_generators *gen ) +{ + /* Need to: + * - check kernel sse support + * - check p4/sse2 + */ + (void) makeSSENormal3fv; +} + + +#else + +void _tnl_InitSSECodegen( struct dfn_generators *gen ) +{ + (void) gen; +} + +#endif + + + + diff --git a/src/mesa/tnl/t_vtx_x86.c b/src/mesa/tnl/t_vtx_x86.c new file mode 100644 index 0000000000..4713a325bf --- /dev/null +++ b/src/mesa/tnl/t_vtx_x86.c @@ -0,0 +1,727 @@ +/* $XFree86$ */ +/************************************************************************** + +Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include <stdio.h> +#include <assert.h> +#include "mem.h" +#include "mmath.h" +#include "simple_list.h" +#include "tnl_vtxfmt.h" + +#if defined(USE_X86_ASM) + +/* Build specialized versions of the immediate calls on the fly for + * the current state. Generic x86 versions. + */ + +struct dynfn *tnl_makeX86Vertex3f( TNLcontext *tnl, int key ) +{ + struct dynfn *dfn = MALLOC_STRUCT( dynfn ); + + if (RADEON_DEBUG & DEBUG_CODEGEN) + fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key ); + + switch (tnl->vertex_size) { + case 4: { + static char temp[] = { + 0x8b, 0x0d, 0,0,0,0, /* mov DMAPTR,%ecx */ + 0x8b, 0x44, 0x24, 0x04, /* mov 0x4(%esp,1),%eax */ + 0x8b, 0x54, 0x24, 0x08, /* mov 0x8(%esp,1),%edx */ + 0x89, 0x01, /* mov %eax,(%ecx) */ + 0x89, 0x51, 0x04, /* mov %edx,0x4(%ecx) */ + 0x8b, 0x44, 0x24, 0x0c, /* mov 0xc(%esp,1),%eax */ + 0x8b, 0x15, 0,0,0,0, /* mov VERTEX[3],%edx */ + 0x89, 0x41, 0x08, /* mov %eax,0x8(%ecx) */ + 0x89, 0x51, 0x0c, /* mov %edx,0xc(%ecx) */ + 0xa1, 0, 0, 0, 0, /* mov COUNTER,%eax */ + 0x83, 0xc1, 0x10, /* add $0x10,%ecx */ + 0x48, /* dec %eax */ + 0x89, 0x0d, 0,0,0,0, /* mov %ecx,DMAPTR */ + 0xa3, 0, 0, 0, 0, /* mov %eax,COUNTER */ + 0x74, 0x01, /* je +1 */ + 0xc3, /* ret */ + 0xff, 0x25, 0,0,0,0 /* jmp *NOTIFY */ + }; + + dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); + memcpy (dfn->code, temp, sizeof(temp)); + FIXUP(dfn->code, 2, 0x0, (int)&tnl->dmaptr); + FIXUP(dfn->code, 25, 0x0, (int)&tnl->vertex[3]); + FIXUP(dfn->code, 36, 0x0, (int)&tnl->counter); + FIXUP(dfn->code, 46, 0x0, (int)&tnl->dmaptr); + FIXUP(dfn->code, 51, 0x0, (int)&tnl->counter); + FIXUP(dfn->code, 60, 0x0, (int)&tnl->notify); + break; + } + case 6: { + static char temp[] = { + 0x57, /* push %edi */ + 0x8b, 0x3d, 0, 0, 0, 0, /* mov DMAPTR,%edi */ + 0x8b, 0x44, 0x24, 0x8, /* mov 0x8(%esp,1),%eax */ + 0x8b, 0x54, 0x24, 0xc, /* mov 0xc(%esp,1),%edx */ + 0x8b, 0x4c, 0x24, 0x10, /* mov 0x10(%esp,1),%ecx */ + 0x89, 0x07, /* mov %eax,(%edi) */ + 0x89, 0x57, 0x04, /* mov %edx,0x4(%edi) */ + 0x89, 0x4f, 0x08, /* mov %ecx,0x8(%edi) */ + 0xa1, 0, 0, 0, 0, /* mov VERTEX[3],%eax */ + 0x8b, 0x15, 0, 0, 0, 0, /* mov VERTEX[4],%edx */ + 0x8b, 0x0d, 0, 0, 0, 0, /* mov VERTEX[5],%ecx */ + 0x89, 0x47, 0x0c, /* mov %eax,0xc(%edi) */ + 0x89, 0x57, 0x10, /* mov %edx,0x10(%edi) */ + 0x89, 0x4f, 0x14, /* mov %ecx,0x14(%edi) */ + 0x83, 0xc7, 0x18, /* add $0x18,%edi */ + 0xa1, 0, 0, 0, 0, /* mov COUNTER,%eax */ + 0x89, 0x3d, 0, 0, 0, 0, /* mov %edi,DMAPTR */ + 0x48, /* dec %eax */ + 0x5f, /* pop %edi */ + 0xa3, 0, 0, 0, 0, /* mov %eax,COUNTER */ + 0x74, 0x01, /* je +1 */ + 0xc3, /* ret */ + 0xff, 0x25, 0,0,0,0, /* jmp *NOTIFY */ + }; + + dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); + memcpy (dfn->code, temp, sizeof(temp)); + FIXUP(dfn->code, 3, 0x0, (int)&tnl->dmaptr); + FIXUP(dfn->code, 28, 0x0, (int)&tnl->vertex[3]); + FIXUP(dfn->code, 34, 0x0, (int)&tnl->vertex[4]); + FIXUP(dfn->code, 40, 0x0, (int)&tnl->vertex[5]); + FIXUP(dfn->code, 57, 0x0, (int)&tnl->counter); + FIXUP(dfn->code, 63, 0x0, (int)&tnl->dmaptr); + FIXUP(dfn->code, 70, 0x0, (int)&tnl->counter); + FIXUP(dfn->code, 79, 0x0, (int)&tnl->notify); + break; + } + default: { + /* Repz convenient as it's possible to emit code for any size + * vertex with little tweaking. Might as well read vertsize + * though, and have only one of these. + */ + static char temp[] = { + 0x57, /* push %edi */ + 0x56, /* push %esi */ + 0xbe, 0, 0, 0, 0, /* mov $VERTEX+3,%esi */ + 0x8b, 0x3d, 0, 0, 0, 0, /* mov DMAPTR,%edi */ + 0x8b, 0x44, 0x24, 0x0c, /* mov 0x0c(%esp,1),%eax */ + 0x8b, 0x54, 0x24, 0x10, /* mov 0x10(%esp,1),%edx */ + 0x8b, 0x4c, 0x24, 0x14, /* mov 0x14(%esp,1),%ecx */ + 0x89, 0x07, /* mov %eax,(%edi) */ + 0x89, 0x57, 0x04, /* mov %edx,0x4(%edi) */ + 0x89, 0x4f, 0x08, /* mov %ecx,0x8(%edi) */ + 0x83, 0xc7, 0x0c, /* add $0xc,%edi */ + 0xb9, 0, 0, 0, 0, /* mov $VERTSIZE-3,%ecx */ + 0xf3, 0xa5, /* repz movsl %ds:(%esi),%es:(%edi)*/ + 0xa1, 0, 0, 0, 0, /* mov COUNTER,%eax */ + 0x89, 0x3d, 0, 0, 0, 0, /* mov %edi,DMAPTR */ + 0x48, /* dec %eax */ + 0xa3, 0, 0, 0, 0, /* mov %eax,COUNTER */ + 0x5e, /* pop %esi */ + 0x5f, /* pop %edi */ + 0x74, 0x01, /* je +1 */ + 0xc3, /* ret */ + 0xff, 0x25, 0, 0, 0, 0 /* jmp NOTIFY */ + }; + + dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); + memcpy (dfn->code, temp, sizeof(temp)); + FIXUP(dfn->code, 3, 0x0, (int)&tnl->vertex[3]); + FIXUP(dfn->code, 9, 0x0, (int)&tnl->dmaptr); + FIXUP(dfn->code, 37, 0x0, tnl->vertex_size-3); + FIXUP(dfn->code, 44, 0x0, (int)&tnl->counter); + FIXUP(dfn->code, 50, 0x0, (int)&tnl->dmaptr); + FIXUP(dfn->code, 56, 0x0, (int)&tnl->counter); + FIXUP(dfn->code, 67, 0x0, (int)&tnl->notify); + break; + } + } + + insert_at_head( &tnl->dfn_cache.Vertex3f, dfn ); + dfn->key = key; + return dfn; +} + + + +struct dynfn *tnl_makeX86Vertex3fv( TNLcontext *tnl, int key ) +{ + struct dynfn *dfn = MALLOC_STRUCT( dynfn ); + + if (TNL_DEBUG & DEBUG_CODEGEN) + fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key ); + + switch (tnl->vertex_size) { + case 6: { + static char temp[] = { + 0xa1, 0x00, 0x00, 0, 0, /* mov 0x0,%eax */ + 0x8b, 0x4c, 0x24, 0x04, /* mov 0x4(%esp,1),%ecx */ + 0x8b, 0x11, /* mov (%ecx),%edx */ + 0x89, 0x10, /* mov %edx,(%eax) */ + 0x8b, 0x51, 0x04, /* mov 0x4(%ecx),%edx */ + 0x8b, 0x49, 0x08, /* mov 0x8(%ecx),%ecx */ + 0x89, 0x50, 0x04, /* mov %edx,0x4(%eax) */ + 0x89, 0x48, 0x08, /* mov %ecx,0x8(%eax) */ + 0x8b, 0x15, 0x1c, 0, 0, 0, /* mov 0x1c,%edx */ + 0x8b, 0x0d, 0x20, 0, 0, 0, /* mov 0x20,%ecx */ + 0x89, 0x50, 0x0c, /* mov %edx,0xc(%eax) */ + 0x89, 0x48, 0x10, /* mov %ecx,0x10(%eax) */ + 0x8b, 0x15, 0x24, 0, 0, 0, /* mov 0x24,%edx */ + 0x89, 0x50, 0x14, /* mov %edx,0x14(%eax) */ + 0x83, 0xc0, 0x18, /* add $0x18,%eax */ + 0xa3, 0x00, 0x00, 0, 0, /* mov %eax,0x0 */ + 0xa1, 0x04, 0x00, 0, 0, /* mov 0x4,%eax */ + 0x48, /* dec %eax */ + 0xa3, 0x04, 0x00, 0, 0, /* mov %eax,0x4 */ + 0x74, 0x01, /* je 2a4 <.f11> */ + 0xc3, /* ret */ + 0xff, 0x25, 0x08, 0, 0, 0, /* jmp *0x8 */ + }; + + dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); + memcpy (dfn->code, temp, sizeof(temp)); + FIXUP(dfn->code, 1, 0x00000000, (int)&tnl->dmaptr); + FIXUP(dfn->code, 27, 0x0000001c, (int)&tnl->vertex[3]); + FIXUP(dfn->code, 33, 0x00000020, (int)&tnl->vertex[4]); + FIXUP(dfn->code, 45, 0x00000024, (int)&tnl->vertex[5]); + FIXUP(dfn->code, 56, 0x00000000, (int)&tnl->dmaptr); + FIXUP(dfn->code, 61, 0x00000004, (int)&tnl->counter); + FIXUP(dfn->code, 67, 0x00000004, (int)&tnl->counter); + FIXUP(dfn->code, 76, 0x00000008, (int)&tnl->notify); + break; + } + + + case 8: { + static char temp[] = { + 0xa1, 0x00, 0x00, 0, 0, /* mov 0x0,%eax */ + 0x8b, 0x4c, 0x24, 0x04, /* mov 0x4(%esp,1),%ecx */ + 0x8b, 0x11, /* mov (%ecx),%edx */ + 0x89, 0x10, /* mov %edx,(%eax) */ + 0x8b, 0x51, 0x04, /* mov 0x4(%ecx),%edx */ + 0x8b, 0x49, 0x08, /* mov 0x8(%ecx),%ecx */ + 0x89, 0x50, 0x04, /* mov %edx,0x4(%eax) */ + 0x89, 0x48, 0x08, /* mov %ecx,0x8(%eax) */ + 0x8b, 0x15, 0x1c, 0, 0, 0, /* mov 0x1c,%edx */ + 0x8b, 0x0d, 0x20, 0, 0, 0, /* mov 0x20,%ecx */ + 0x89, 0x50, 0x0c, /* mov %edx,0xc(%eax) */ + 0x89, 0x48, 0x10, /* mov %ecx,0x10(%eax) */ + 0x8b, 0x15, 0x1c, 0, 0, 0, /* mov 0x1c,%edx */ + 0x8b, 0x0d, 0x20, 0, 0, 0, /* mov 0x20,%ecx */ + 0x89, 0x50, 0x14, /* mov %edx,0x14(%eax) */ + 0x89, 0x48, 0x18, /* mov %ecx,0x18(%eax) */ + 0x8b, 0x15, 0x24, 0, 0, 0, /* mov 0x24,%edx */ + 0x89, 0x50, 0x1c, /* mov %edx,0x1c(%eax) */ + 0x83, 0xc0, 0x20, /* add $0x20,%eax */ + 0xa3, 0x00, 0x00, 0, 0, /* mov %eax,0x0 */ + 0xa1, 0x04, 0x00, 0, 0, /* mov 0x4,%eax */ + 0x48, /* dec %eax */ + 0xa3, 0x04, 0x00, 0, 0, /* mov %eax,0x4 */ + 0x74, 0x01, /* je 2a4 <.f11> */ + 0xc3, /* ret */ + 0xff, 0x25, 0x08, 0, 0, 0, /* jmp *0x8 */ + }; + + dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); + memcpy (dfn->code, temp, sizeof(temp)); + FIXUP(dfn->code, 1, 0x00000000, (int)&tnl->dmaptr); + FIXUP(dfn->code, 27, 0x0000001c, (int)&tnl->vertex[3]); + FIXUP(dfn->code, 33, 0x00000020, (int)&tnl->vertex[4]); + FIXUP(dfn->code, 45, 0x0000001c, (int)&tnl->vertex[5]); + FIXUP(dfn->code, 51, 0x00000020, (int)&tnl->vertex[6]); + FIXUP(dfn->code, 63, 0x00000024, (int)&tnl->vertex[7]); + FIXUP(dfn->code, 74, 0x00000000, (int)&tnl->dmaptr); + FIXUP(dfn->code, 79, 0x00000004, (int)&tnl->counter); + FIXUP(dfn->code, 85, 0x00000004, (int)&tnl->counter); + FIXUP(dfn->code, 94, 0x00000008, (int)&tnl->notify); + break; + } + + + + default: { + /* Repz convenient as it's possible to emit code for any size + * vertex with little tweaking. Might as well read vertsize + * though, and have only one of these. + */ + static char temp[] = { + 0x8b, 0x54, 0x24, 0x04, /* mov 0x4(%esp,1),%edx */ + 0x57, /* push %edi */ + 0x56, /* push %esi */ + 0x8b, 0x3d, 1,1,1,1, /* mov DMAPTR,%edi */ + 0x8b, 0x02, /* mov (%edx),%eax */ + 0x8b, 0x4a, 0x04, /* mov 0x4(%edx),%ecx */ + 0x8b, 0x72, 0x08, /* mov 0x8(%edx),%esi */ + 0x89, 0x07, /* mov %eax,(%edi) */ + 0x89, 0x4f, 0x04, /* mov %ecx,0x4(%edi) */ + 0x89, 0x77, 0x08, /* mov %esi,0x8(%edi) */ + 0x83, 0xc7, 0x0c, /* add $0xc,%edi */ + 0xb9, 0x06, 0x00, 0x00, 0x00, /* mov $VERTSIZE-3,%ecx */ + 0xbe, 0x58, 0x00, 0x00, 0x00, /* mov $VERTEX[3],%esi */ + 0xf3, 0xa5, /* repz movsl %ds:(%esi),%es:(%edi)*/ + 0x89, 0x3d, 1, 1, 1, 1, /* mov %edi,DMAPTR */ + 0xa1, 2, 2, 2, 2, /* mov COUNTER,%eax */ + 0x5e, /* pop %esi */ + 0x5f, /* pop %edi */ + 0x48, /* dec %eax */ + 0xa3, 2, 2, 2, 2, /* mov %eax,COUNTER */ + 0x74, 0x01, /* je +1 */ + 0xc3, /* ret */ + 0xff, 0x25, 0, 0, 0, 0 /* jmp NOTIFY */ + }; + + dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); + memcpy (dfn->code, temp, sizeof(temp)); + FIXUP(dfn->code, 8, 0x01010101, (int)&tnl->dmaptr); + FIXUP(dfn->code, 32, 0x00000006, tnl->vertex_size-3); + FIXUP(dfn->code, 37, 0x00000058, (int)&tnl->vertex[3]); + FIXUP(dfn->code, 45, 0x01010101, (int)&tnl->dmaptr); + FIXUP(dfn->code, 50, 0x02020202, (int)&tnl->counter); + FIXUP(dfn->code, 58, 0x02020202, (int)&tnl->counter); + FIXUP(dfn->code, 67, 0x0, (int)&tnl->notify); + break; + } + } + + insert_at_head( &tnl->dfn_cache.Vertex3fv, dfn ); + dfn->key = key; + return dfn; +} + + +struct dynfn *tnl_makeX86Normal3fv( TNLcontext *tnl, int key ) +{ + static char temp[] = { + 0x8b, 0x44, 0x24, 0x04, /* mov 0x4(%esp,1),%eax */ + 0xba, 0, 0, 0, 0, /* mov $DEST,%edx */ + 0x8b, 0x08, /* mov (%eax),%ecx */ + 0x89, 0x0a, /* mov %ecx,(%edx) */ + 0x8b, 0x48, 0x04, /* mov 0x4(%eax),%ecx */ + 0x89, 0x4a, 0x04, /* mov %ecx,0x4(%edx) */ + 0x8b, 0x48, 0x08, /* mov 0x8(%eax),%ecx */ + 0x89, 0x4a, 0x08, /* mov %ecx,0x8(%edx) */ + 0xc3, /* ret */ + }; + + struct dynfn *dfn = MALLOC_STRUCT( dynfn ); + + if (TNL_DEBUG & DEBUG_CODEGEN) + fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key ); + + insert_at_head( &tnl->dfn_cache.Normal3fv, dfn ); + dfn->key = key; + dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); + memcpy (dfn->code, temp, sizeof(temp)); + FIXUP(dfn->code, 5, 0x0, (int)tnl->normalptr); + return dfn; +} + +struct dynfn *tnl_makeX86Normal3f( TNLcontext *tnl, int key ) +{ + static char temp[] = { + 0xba, 0x78, 0x56, 0x34, 0x12, /* mov $DEST,%edx */ + 0x8b, 0x44, 0x24, 0x04, /* mov 0x4(%esp,1),%eax */ + 0x89, 0x02, /* mov %eax,(%edx) */ + 0x8b, 0x44, 0x24, 0x08, /* mov 0x8(%esp,1),%eax */ + 0x89, 0x42, 0x04, /* mov %eax,0x4(%edx) */ + 0x8b, 0x44, 0x24, 0x0c, /* mov 0xc(%esp,1),%eax */ + 0x89, 0x42, 0x08, /* mov %eax,0x8(%edx) */ + 0xc3, /* ret */ + }; + + struct dynfn *dfn = MALLOC_STRUCT( dynfn ); + + if (TNL_DEBUG & DEBUG_CODEGEN) + fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key ); + + insert_at_head( &tnl->dfn_cache.Normal3f, dfn ); + dfn->key = key; + dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); + memcpy (dfn->code, temp, sizeof(temp)); + FIXUP(dfn->code, 1, 0x12345678, (int)tnl->normalptr); + return dfn; +} + +struct dynfn *tnl_makeX86Color4ubv( TNLcontext *tnl, int key ) +{ + struct dynfn *dfn = MALLOC_STRUCT( dynfn ); + insert_at_head( &tnl->dfn_cache.Color4ubv, dfn ); + dfn->key = key; + + if (TNL_DEBUG & DEBUG_CODEGEN) + fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key ); + + if (key & TNL_CP_VC_FRMT_PKCOLOR) { + static char temp[] = { + 0x8b, 0x44, 0x24, 0x04, /* mov 0x4(%esp,1),%eax */ + 0xba, 0x78, 0x56, 0x34, 0x12, /* mov $DEST,%edx */ + 0x8b, 0x00, /* mov (%eax),%eax */ + 0x89, 0x02, /* mov %eax,(%edx) */ + 0xc3, /* ret */ + }; + + dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); + memcpy (dfn->code, temp, sizeof(temp)); + FIXUP(dfn->code, 5, 0x12345678, (int)tnl->ubytecolorptr); + return dfn; + } + else { + static char temp[] = { + 0x53, /* push %ebx */ + 0xba, 0x00, 0x00, 0x00, 0x00, /* mov $0x0,%edx */ + 0x31, 0xc0, /* xor %eax,%eax */ + 0x31, 0xc9, /* xor %ecx,%ecx */ + 0x8b, 0x5c, 0x24, 0x08, /* mov 0x8(%esp,1), %ebx */ + 0x8b, 0x1b, /* mov (%ebx), %ebx */ + 0x88, 0xd8, /* mov %bl, %al */ + 0x88, 0xf9, /* mov %bh, %cl */ + 0x8b, 0x04, 0x82, /* mov (%edx,%eax,4),%eax */ + 0x8b, 0x0c, 0x8a, /* mov (%edx,%ecx,4),%ecx */ + 0xa3, 0xaf, 0xbe, 0xad, 0xde, /* mov %eax,0xdeadbeaf */ + 0x89, 0x0d, 0xaf, 0xbe, 0xad, 0xde, /* mov %ecx,0xdeadbeaf */ + 0x31, 0xc0, /* xor %eax,%eax */ + 0x31, 0xc9, /* xor %ecx,%ecx */ + 0xc1, 0xeb, 0x10, /* shr $0x10, %ebx */ + 0x88, 0xd8, /* mov %bl, %al */ + 0x88, 0xf9, /* mov %bh, %cl */ + 0x8b, 0x04, 0x82, /* mov (%edx,%eax,4),%eax */ + 0x8b, 0x0c, 0x8a, /* mov (%edx,%ecx,4),%ecx */ + 0xa3, 0xaf, 0xbe, 0xad, 0xde, /* mov %eax,0xdeadbeaf */ + 0x89, 0x0d, 0xaf, 0xbe, 0xad, 0xde, /* mov %ecx,0xdeadbeaf */ + 0x5b, /* pop %ebx */ + 0xc3, /* ret */ + }; + + dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); + memcpy (dfn->code, temp, sizeof(temp)); + FIXUP(dfn->code, 2, 0x00000000, (int)_mesa_ubyte_to_float_color_tab); + FIXUP(dfn->code, 27, 0xdeadbeaf, (int)tnl->floatcolorptr); + FIXUP(dfn->code, 33, 0xdeadbeaf, (int)tnl->floatcolorptr+4); + FIXUP(dfn->code, 55, 0xdeadbeaf, (int)tnl->floatcolorptr+8); + FIXUP(dfn->code, 61, 0xdeadbeaf, (int)tnl->floatcolorptr+12); + return dfn; + } +} + +struct dynfn *tnl_makeX86Color4ub( TNLcontext *tnl, int key ) +{ + if (TNL_DEBUG & DEBUG_CODEGEN) + fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key ); + + if (key & TNL_CP_VC_FRMT_PKCOLOR) { + /* XXX push/pop */ + static char temp[] = { + 0x53, /* push %ebx */ + 0x8b, 0x44, 0x24, 0x08, /* mov 0x8(%esp,1),%eax */ + 0x8b, 0x54, 0x24, 0x0c, /* mov 0xc(%esp,1),%edx */ + 0x8b, 0x4c, 0x24, 0x10, /* mov 0x10(%esp,1),%ecx */ + 0x8b, 0x5c, 0x24, 0x14, /* mov 0x14(%esp,1),%ebx */ + 0xa2, 0, 0, 0, 0, /* mov %al,DEST */ + 0x88, 0x15, 0, 0, 0, 0, /* mov %dl,DEST+1 */ + 0x88, 0x0d, 0, 0, 0, 0, /* mov %cl,DEST+2 */ + 0x88, 0x1d, 0, 0, 0, 0, /* mov %bl,DEST+3 */ + 0x5b, /* pop %ebx */ + 0xc3, /* ret */ + }; + + struct dynfn *dfn = MALLOC_STRUCT( dynfn ); + insert_at_head( &tnl->dfn_cache.Color4ub, dfn ); + dfn->key = key; + + dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); + memcpy (dfn->code, temp, sizeof(temp)); + FIXUP(dfn->code, 18, 0x0, (int)tnl->ubytecolorptr); + FIXUP(dfn->code, 24, 0x0, (int)tnl->ubytecolorptr+1); + FIXUP(dfn->code, 30, 0x0, (int)tnl->ubytecolorptr+2); + FIXUP(dfn->code, 36, 0x0, (int)tnl->ubytecolorptr+3); + return dfn; + } + else + return 0; +} + + +struct dynfn *tnl_makeX86Color3fv( TNLcontext *tnl, int key ) +{ + if (key & (TNL_CP_VC_FRMT_PKCOLOR|TNL_CP_VC_FRMT_FPALPHA)) + return 0; + else + { + static char temp[] = { + 0x8b, 0x44, 0x24, 0x04, /* mov 0x4(%esp,1),%eax */ + 0xba, 0, 0, 0, 0, /* mov $DEST,%edx */ + 0x8b, 0x08, /* mov (%eax),%ecx */ + 0x89, 0x0a, /* mov %ecx,(%edx) */ + 0x8b, 0x48, 0x04, /* mov 0x4(%eax),%ecx */ + 0x89, 0x4a, 0x04, /* mov %ecx,0x4(%edx) */ + 0x8b, 0x48, 0x08, /* mov 0x8(%eax),%ecx */ + 0x89, 0x4a, 0x08, /* mov %ecx,0x8(%edx) */ + 0xc3, /* ret */ + }; + + struct dynfn *dfn = MALLOC_STRUCT( dynfn ); + + if (TNL_DEBUG & DEBUG_CODEGEN) + fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key ); + + insert_at_head( &tnl->dfn_cache.Color3fv, dfn ); + dfn->key = key; + dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); + memcpy (dfn->code, temp, sizeof(temp)); + FIXUP(dfn->code, 5, 0x0, (int)tnl->floatcolorptr); + return dfn; + } +} + +struct dynfn *tnl_makeX86Color3f( TNLcontext *tnl, int key ) +{ + if (key & (TNL_CP_VC_FRMT_PKCOLOR|TNL_CP_VC_FRMT_FPALPHA)) + return 0; + else + { + static char temp[] = { + 0xba, 0x78, 0x56, 0x34, 0x12, /* mov $DEST,%edx */ + 0x8b, 0x44, 0x24, 0x04, /* mov 0x4(%esp,1),%eax */ + 0x89, 0x02, /* mov %eax,(%edx) */ + 0x8b, 0x44, 0x24, 0x08, /* mov 0x8(%esp,1),%eax */ + 0x89, 0x42, 0x04, /* mov %eax,0x4(%edx) */ + 0x8b, 0x44, 0x24, 0x0c, /* mov 0xc(%esp,1),%eax */ + 0x89, 0x42, 0x08, /* mov %eax,0x8(%edx) */ + 0xc3, /* ret */ + }; + + struct dynfn *dfn = MALLOC_STRUCT( dynfn ); + + if (TNL_DEBUG & DEBUG_CODEGEN) + fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key ); + + insert_at_head( &tnl->dfn_cache.Color3f, dfn ); + dfn->key = key; + dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); + memcpy (dfn->code, temp, sizeof(temp)); + FIXUP(dfn->code, 1, 0x12345678, (int)tnl->floatcolorptr); + return dfn; + } +} + + + +struct dynfn *tnl_makeX86TexCoord2fv( TNLcontext *tnl, int key ) +{ + static char temp[] = { + 0x8b, 0x44, 0x24, 0x04, /* mov 0x4(%esp,1),%eax */ + 0xba, 0x78, 0x56, 0x34, 0x12, /* mov $DEST,%edx */ + 0x8b, 0x08, /* mov (%eax),%ecx */ + 0x8b, 0x40, 0x04, /* mov 0x4(%eax),%eax */ + 0x89, 0x0a, /* mov %ecx,(%edx) */ + 0x89, 0x42, 0x04, /* mov %eax,0x4(%edx) */ + 0xc3, /* ret */ + }; + + struct dynfn *dfn = MALLOC_STRUCT( dynfn ); + + if (TNL_DEBUG & DEBUG_CODEGEN) + fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key ); + + insert_at_head( &tnl->dfn_cache.TexCoord2fv, dfn ); + dfn->key = key; + dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); + memcpy (dfn->code, temp, sizeof(temp)); + FIXUP(dfn->code, 5, 0x12345678, (int)tnl->texcoordptr[0]); + return dfn; +} + +struct dynfn *tnl_makeX86TexCoord2f( TNLcontext *tnl, int key ) +{ + static char temp[] = { + 0xba, 0x78, 0x56, 0x34, 0x12, /* mov $DEST,%edx */ + 0x8b, 0x44, 0x24, 0x04, /* mov 0x4(%esp,1),%eax */ + 0x8b, 0x4c, 0x24, 0x08, /* mov 0x8(%esp,1),%ecx */ + 0x89, 0x02, /* mov %eax,(%edx) */ + 0x89, 0x4a, 0x04, /* mov %ecx,0x4(%edx) */ + 0xc3, /* ret */ + }; + + struct dynfn *dfn = MALLOC_STRUCT( dynfn ); + + if (TNL_DEBUG & DEBUG_CODEGEN) + fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key ); + + insert_at_head( &tnl->dfn_cache.TexCoord2f, dfn ); + dfn->key = key; + dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); + memcpy (dfn->code, temp, sizeof(temp)); + FIXUP(dfn->code, 1, 0x12345678, (int)tnl->texcoordptr[0]); + return dfn; +} + +struct dynfn *tnl_makeX86MultiTexCoord2fvARB( TNLcontext *tnl, int key ) +{ + static char temp[] = { + 0x8b, 0x44, 0x24, 0x04, /* mov 0x4(%esp,1),%eax */ + 0x8b, 0x4c, 0x24, 0x08, /* mov 0x8(%esp,1),%ecx */ + 0x2d, 0xc0, 0x84, 0x00, 0x00, /* sub $0x84c0,%eax */ + 0x83, 0xe0, 0x01, /* and $0x1,%eax */ + 0x8b, 0x11, /* mov (%ecx),%edx */ + 0xc1, 0xe0, 0x03, /* shl $0x3,%eax */ + 0x8b, 0x49, 0x04, /* mov 0x4(%ecx),%ecx */ + 0x89, 0x90, 0, 0, 0, 0,/* mov %edx,DEST(%eax) */ + 0x89, 0x88, 0, 0, 0, 0,/* mov %ecx,DEST+8(%eax) */ + 0xc3, /* ret */ + }; + + static char temp2[] = { + 0x8b, 0x44, 0x24, 0x04, /* mov 0x4(%esp,1),%eax */ + 0x8b, 0x4c, 0x24, 0x08, /* mov 0x8(%esp,1),%ecx */ + 0x2d, 0xc0, 0x84, 0x00, 0x00, /* sub $0x84c0,%eax */ + 0x83, 0xe0, 0x01, /* and $0x1,%eax */ + 0x8b, 0x14, 0x85, 0, 0, 0, 0, /* mov DEST(,%eax,4),%edx */ + 0x8b, 0x01, /* mov (%ecx),%eax */ + 0x89, 0x02, /* mov %eax,(%edx) */ + 0x8b, 0x41, 0x04, /* mov 0x4(%ecx),%eax */ + 0x89, 0x42, 0x04, /* mov %eax,0x4(%edx) */ + 0xc3, /* ret */ + }; + + struct dynfn *dfn = MALLOC_STRUCT( dynfn ); + + if (TNL_DEBUG & DEBUG_CODEGEN) + fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key ); + + insert_at_head( &tnl->dfn_cache.MultiTexCoord2fvARB, dfn ); + dfn->key = key; + + if ((key & (TNL_CP_VC_FRMT_ST0|TNL_CP_VC_FRMT_ST1)) == + (TNL_CP_VC_FRMT_ST0|TNL_CP_VC_FRMT_ST1)) { + dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); + memcpy (dfn->code, temp, sizeof(temp)); + FIXUP(dfn->code, 26, 0x0, (int)tnl->texcoordptr[0]); + FIXUP(dfn->code, 32, 0x0, (int)tnl->texcoordptr[0]+4); + } else { + dfn->code = ALIGN_MALLOC( sizeof(temp2), 16 ); + memcpy (dfn->code, temp2, sizeof(temp2)); + FIXUP(dfn->code, 19, 0x0, (int)tnl->texcoordptr); + } + return dfn; +} + +struct dynfn *tnl_makeX86MultiTexCoord2fARB( TNLcontext *tnl, + int key ) +{ + static char temp[] = { + 0x8b, 0x44, 0x24, 0x04, /* mov 0x4(%esp,1),%eax */ + 0x8b, 0x54, 0x24, 0x08, /* mov 0x8(%esp,1),%edx */ + 0x2d, 0xc0, 0x84, 0x00, 0x00, /* sub $0x84c0,%eax */ + 0x8b, 0x4c, 0x24, 0x0c, /* mov 0xc(%esp,1),%ecx */ + 0x83, 0xe0, 0x01, /* and $0x1,%eax */ + 0xc1, 0xe0, 0x03, /* shl $0x3,%eax */ + 0x89, 0x90, 0, 0, 0, 0, /* mov %edx,DEST(%eax) */ + 0x89, 0x88, 0, 0, 0, 0, /* mov %ecx,DEST+8(%eax) */ + 0xc3, /* ret */ + }; + + static char temp2[] = { + 0x8b, 0x44, 0x24, 0x04, /* mov 0x4(%esp,1),%eax */ + 0x8b, 0x54, 0x24, 0x08, /* mov 0x8(%esp,1),%edx */ + 0x2d, 0xc0, 0x84, 0x00, 0x00, /* sub $0x84c0,%eax */ + 0x8b, 0x4c, 0x24, 0x0c, /* mov 0xc(%esp,1),%ecx */ + 0x83, 0xe0, 0x01, /* and $0x1,%eax */ + 0x8b, 0x04, 0x85, 0, 0, 0, 0, /* mov DEST(,%eax,4),%eax */ + 0x89, 0x10, /* mov %edx,(%eax) */ + 0x89, 0x48, 0x04, /* mov %ecx,0x4(%eax) */ + 0xc3, /* ret */ + }; + + struct dynfn *dfn = MALLOC_STRUCT( dynfn ); + + if (TNL_DEBUG & DEBUG_CODEGEN) + fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key ); + + insert_at_head( &tnl->dfn_cache.MultiTexCoord2fARB, dfn ); + dfn->key = key; + + if ((key & (TNL_CP_VC_FRMT_ST0|TNL_CP_VC_FRMT_ST1)) == + (TNL_CP_VC_FRMT_ST0|TNL_CP_VC_FRMT_ST1)) { + dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); + memcpy (dfn->code, temp, sizeof(temp)); + FIXUP(dfn->code, 25, 0x0, (int)tnl->texcoordptr[0]); + FIXUP(dfn->code, 31, 0x0, (int)tnl->texcoordptr[0]+4); + } + else { + /* Note: this might get generated multiple times, even though the + * actual emitted code is the same. + */ + dfn->code = ALIGN_MALLOC( sizeof(temp2), 16 ); + memcpy (dfn->code, temp2, sizeof(temp2)); + FIXUP(dfn->code, 23, 0x0, (int)tnl->texcoordptr); + } + return dfn; +} + + +void _tnl_InitX86Codegen( struct dfn_generators *gen ) +{ + gen->Vertex3f = tnl_makeX86Vertex3f; + gen->Vertex3fv = tnl_makeX86Vertex3fv; + gen->Color4ub = tnl_makeX86Color4ub; /* PKCOLOR only */ + gen->Color4ubv = tnl_makeX86Color4ubv; /* PKCOLOR only */ + gen->Normal3f = tnl_makeX86Normal3f; + gen->Normal3fv = tnl_makeX86Normal3fv; + gen->TexCoord2f = tnl_makeX86TexCoord2f; + gen->TexCoord2fv = tnl_makeX86TexCoord2fv; + gen->MultiTexCoord2fARB = tnl_makeX86MultiTexCoord2fARB; + gen->MultiTexCoord2fvARB = tnl_makeX86MultiTexCoord2fvARB; + gen->Color3f = tnl_makeX86Color3f; + gen->Color3fv = tnl_makeX86Color3fv; + + /* Not done: + */ +/* gen->Vertex2f = tnl_makeX86Vertex2f; */ +/* gen->Vertex2fv = tnl_makeX86Vertex2fv; */ +/* gen->Color3ub = tnl_makeX86Color3ub; */ +/* gen->Color3ubv = tnl_makeX86Color3ubv; */ +/* gen->Color4f = tnl_makeX86Color4f; */ +/* gen->Color4fv = tnl_makeX86Color4fv; */ +/* gen->TexCoord1f = tnl_makeX86TexCoord1f; */ +/* gen->TexCoord1fv = tnl_makeX86TexCoord1fv; */ +/* gen->MultiTexCoord1fARB = tnl_makeX86MultiTexCoord1fARB; */ +/* gen->MultiTexCoord1fvARB = tnl_makeX86MultiTexCoord1fvARB; */ +} + + +#else + +void _tnl_InitX86Codegen( struct dfn_generators *gen ) +{ + (void) gen; +} + +#endif |