From c8100a02d28c8a424f69723778abebd950914bc6 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 29 Mar 2004 11:05:02 +0000 Subject: First round of codegen for t_vtx_api.c -- ie the Begin/Vertex/End code. Enable with env var: MESA_CODEGEN=t. --- src/mesa/sources | 5 +- src/mesa/tnl/t_context.c | 4 + src/mesa/tnl/t_context.h | 28 +- src/mesa/tnl/t_vtx_api.c | 749 ++++++++++++++----------------------------- src/mesa/tnl/t_vtx_api.h | 26 ++ src/mesa/tnl/t_vtx_x86_gcc.S | 327 +++++++++++-------- 6 files changed, 496 insertions(+), 643 deletions(-) (limited to 'src') diff --git a/src/mesa/sources b/src/mesa/sources index aeb8559fa2..21727aa574 100644 --- a/src/mesa/sources +++ b/src/mesa/sources @@ -132,6 +132,8 @@ TNL_SOURCES = \ tnl/t_vb_vertex.c \ tnl/t_vertex.c \ tnl/t_vtx_api.c \ + tnl/t_vtx_generic.c \ + tnl/t_vtx_x86.c \ tnl/t_vtx_eval.c \ tnl/t_vtx_exec.c @@ -159,7 +161,8 @@ X86_SOURCES = \ x86/sse_xform2.S \ x86/sse_xform3.S \ x86/sse_xform4.S \ - x86/sse_normal.S + x86/sse_normal.S \ + tnl/t_vtx_x86_gcc.S SPARC_SOURCES = \ sparc/clip.S \ diff --git a/src/mesa/tnl/t_context.c b/src/mesa/tnl/t_context.c index 2c529afd79..330c19d649 100644 --- a/src/mesa/tnl/t_context.c +++ b/src/mesa/tnl/t_context.c @@ -121,6 +121,10 @@ _tnl_CreateContext( GLcontext *ctx ) tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts; tnl->Driver.NotifyMaterialChange = _mesa_validate_all_lighting_tables; + + if (getenv("MESA_CODEGEN")) + tnl->AllowCodegen = GL_TRUE; + return GL_TRUE; } diff --git a/src/mesa/tnl/t_context.h b/src/mesa/tnl/t_context.h index 1293db3bf3..ec5675faf9 100644 --- a/src/mesa/tnl/t_context.h +++ b/src/mesa/tnl/t_context.h @@ -248,10 +248,28 @@ struct tnl_copied_vtx { #define VERT_BUFFER_SIZE 2048 /* 8kbytes */ -#define ERROR_ATTRIB _TNL_ATTRIB_MAX /* error path for t_vtx_api.c */ typedef void (*attrfv_func)( const GLfloat * ); +struct dynfn { + struct dynfn *next, *prev; + int key; + char *code; +}; + +struct dynfn_lists { + struct dynfn Vertex[4]; + struct dynfn Attribute[4]; +}; + +struct dynfn_generators { + struct dynfn *(*Vertex[4])( GLcontext *ctx, int key ); + struct dynfn *(*Attribute[4])( GLcontext *ctx, int key ); +}; + +#define _TNL_MAX_ATTR_CODEGEN 16 + + /* The assembly of vertices in immediate mode is separated from * display list compilation. This allows a simpler immediate mode * treatment and a display list compiler better suited to @@ -269,7 +287,12 @@ struct tnl_vtx { GLfloat *current[_TNL_ATTRIB_MAX]; /* points into ctx->Current, etc */ GLuint counter, initial_counter; struct tnl_copied_vtx copied; - attrfv_func tabfv[_TNL_ATTRIB_MAX+1][4]; /* +1 for ERROR_ATTRIB */ + + attrfv_func tabfv[_TNL_MAX_ATTR_CODEGEN+1][4]; /* plus 1 for ERROR_ATTRIB */ + + struct dynfn_lists cache; + struct dynfn_generators gen; + struct tnl_eval eval; GLboolean *edgeflag_tmp; GLboolean have_materials; @@ -714,6 +737,7 @@ typedef struct GLboolean IsolateMaterials; GLboolean AllowVertexFog; GLboolean AllowPixelFog; + GLboolean AllowCodegen; GLboolean _DoVertexFog; /* eval fog function at each vertex? */ diff --git a/src/mesa/tnl/t_vtx_api.c b/src/mesa/tnl/t_vtx_api.c index 4f07e2cc7e..1756617f5c 100644 --- a/src/mesa/tnl/t_vtx_api.c +++ b/src/mesa/tnl/t_vtx_api.c @@ -41,9 +41,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "api_arrayelt.h" #include "api_noop.h" #include "t_vtx_api.h" +#include "simple_list.h" +static void reset_attrfv( TNLcontext *tnl ); -static void init_attrfv( TNLcontext *tnl ); +static attrfv_func choose[_TNL_MAX_ATTR_CODEGEN+1][4]; /* +1 for ERROR_ATTRIB */ +static attrfv_func generic_attr_func[_TNL_MAX_ATTR_CODEGEN][4]; /* Close off the last primitive, execute the buffer, restart the @@ -100,7 +103,7 @@ static void _tnl_wrap_buffers( GLcontext *ctx ) /* Deal with buffer wrapping where provoked by the vertex buffer * filling up, as opposed to upgrade_vertex(). */ -static void _tnl_wrap_filled_vertex( GLcontext *ctx ) +void _tnl_wrap_filled_vertex( GLcontext *ctx ) { TNLcontext *tnl = TNL_CONTEXT(ctx); GLfloat *data = tnl->vtx.copied.buffer; @@ -156,7 +159,8 @@ static void _tnl_copy_to_current( GLcontext *ctx ) /* Colormaterial -- this kindof sucks. */ if (ctx->Light.ColorMaterialEnabled) { - _mesa_update_color_material(ctx, ctx->Current.Attrib[VERT_ATTRIB_COLOR0]); + _mesa_update_color_material(ctx, + ctx->Current.Attrib[VERT_ATTRIB_COLOR0]); } if (tnl->vtx.have_materials) { @@ -204,7 +208,6 @@ static void _tnl_wrap_upgrade_vertex( GLcontext *ctx, GLfloat *tmp; GLint lastcount = tnl->vtx.initial_counter - tnl->vtx.counter; - /* Run pipeline on current vertices, copy wrapped vertices * to tnl->vtx.copied. */ @@ -222,10 +225,10 @@ static void _tnl_wrap_upgrade_vertex( GLcontext *ctx, * begin/end so that they don't bloat the vertices. */ if (ctx->Driver.CurrentExecPrimitive == PRIM_OUTSIDE_BEGIN_END && - tnl->vtx.attrsz[attr] == 0 - && lastcount > 8 - ) { - init_attrfv( tnl ); + tnl->vtx.attrsz[attr] == 0 && + lastcount > 8 && + tnl->vtx.vertex_size) { + reset_attrfv( tnl ); } /* Fix up sizes: @@ -289,6 +292,19 @@ static void _tnl_wrap_upgrade_vertex( GLcontext *ctx, tnl->vtx.counter -= tnl->vtx.copied.nr; tnl->vtx.copied.nr = 0; } + + /* For codegen - attrptr's may have changed, so need to redo + * codegen. Might be a reasonable place to try & detect attributes + * in the vertex which aren't being submitted any more. + */ + for (i = 0 ; i < _TNL_ATTRIB_MAX ; i++) + if (tnl->vtx.attrsz[i]) { + GLuint j = tnl->vtx.attrsz[i] - 1; + + if (i < _TNL_MAX_ATTR_CODEGEN) + tnl->vtx.tabfv[i][j] = choose[i][j]; + } + } @@ -314,146 +330,131 @@ static void _tnl_fixup_vertex( GLcontext *ctx, GLuint attr, GLuint sz ) } +static struct dynfn *lookup( struct dynfn *l, GLuint key ) +{ + struct dynfn *f; + + foreach( f, l ) { + if (f->key == key) + return f; + } + + return 0; +} + + +static attrfv_func do_codegen( GLcontext *ctx, GLuint attr, GLuint sz ) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct dynfn *dfn = 0; + if (attr == 0) { + GLuint key = tnl->vtx.vertex_size; + + dfn = lookup( &tnl->vtx.cache.Vertex[sz-1], key ); + + if (!dfn) + dfn = tnl->vtx.gen.Vertex[sz-1]( ctx, key ); + } + else { + GLuint key = (GLuint) tnl->vtx.attrptr[attr]; + + dfn = lookup( &tnl->vtx.cache.Attribute[sz-1], key ); + + if (!dfn) + dfn = tnl->vtx.gen.Attribute[sz-1]( ctx, key ); + } + + if (dfn) + return (attrfv_func) dfn->code; + else + return 0; +} /* Helper function for 'CHOOSE' macro. Do what's necessary when an * entrypoint is called for the first time. */ -static void do_choose( GLuint attr, GLuint sz, - void (*fallback_attr_func)( const GLfloat *), - void (*choose1)( const GLfloat *), - void (*choose2)( const GLfloat *), - void (*choose3)( const GLfloat *), - void (*choose4)( const GLfloat *), - const GLfloat *v ) + +static attrfv_func do_choose( GLuint attr, GLuint sz ) { GET_CURRENT_CONTEXT( ctx ); TNLcontext *tnl = TNL_CONTEXT(ctx); + GLuint oldsz = tnl->vtx.attrsz[attr]; + + assert(attr < _TNL_MAX_ATTR_CODEGEN); - if (tnl->vtx.attrsz[attr] != sz) + if (oldsz != sz) { + /* Reset any active pointers for this attribute + */ + if (oldsz) + tnl->vtx.tabfv[attr][oldsz-1] = choose[attr][oldsz-1]; + _tnl_fixup_vertex( ctx, attr, sz ); - /* Does this belong here? Necessitates resetting vtxfmt on each - * flush (otherwise flags won't get reset afterwards). - */ - if (attr == 0) - ctx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; - else - ctx->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT; + /* Does setting NeedFlush belong here? Necessitates resetting + * vtxfmt on each flush (otherwise flags won't get reset + * afterwards). + */ + if (attr == 0) + ctx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; + else + ctx->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT; + } - /* Reset any active pointers for this attribute - */ - tnl->vtx.tabfv[attr][0] = choose1; - tnl->vtx.tabfv[attr][1] = choose2; - tnl->vtx.tabfv[attr][2] = choose3; - tnl->vtx.tabfv[attr][3] = choose4; - /* Update the secondary dispatch table with the new function + /* Try to use codegen: + */ + if (tnl->AllowCodegen) + tnl->vtx.tabfv[attr][sz-1] = do_codegen( ctx, attr, sz ); + else + tnl->vtx.tabfv[attr][sz-1] = 0; + + /* Else use generic version: */ - tnl->vtx.tabfv[attr][sz-1] = fallback_attr_func; + if (!tnl->vtx.tabfv[attr][sz-1]) + tnl->vtx.tabfv[attr][sz-1] = generic_attr_func[attr][sz-1]; - (*fallback_attr_func)(v); + return tnl->vtx.tabfv[attr][sz-1]; } -/* Versions of all the entrypoints for situations where codegen isn't - * available. - * - * Note: Only one size for each attribute may be active at once. - * Eg. if Color3f is installed/active, then Color4f may not be, even - * if the vertex actually contains 4 color coordinates. This is - * because the 3f version won't otherwise set color[3] to 1.0 -- this - * is the job of the chooser function when switching between Color4f - * and Color3f. - */ -#define ATTRFV( ATTR, N ) \ -static void choose_##ATTR##_##N( const GLfloat *v ); \ - \ -static void attrib_##ATTR##_##N( const GLfloat *v ) \ -{ \ - GET_CURRENT_CONTEXT( ctx ); \ - TNLcontext *tnl = TNL_CONTEXT(ctx); \ - \ - if ((ATTR) == 0) { \ - GLuint i; \ - \ - if (N>0) tnl->vtx.vbptr[0] = v[0]; \ - if (N>1) tnl->vtx.vbptr[1] = v[1]; \ - if (N>2) tnl->vtx.vbptr[2] = v[2]; \ - if (N>3) tnl->vtx.vbptr[3] = v[3]; \ - \ - for (i = N; i < tnl->vtx.vertex_size; i++) \ - tnl->vtx.vbptr[i] = tnl->vtx.vertex[i]; \ - \ - tnl->vtx.vbptr += tnl->vtx.vertex_size; \ - \ - if (--tnl->vtx.counter == 0) \ - _tnl_wrap_filled_vertex( ctx ); \ - } \ - else { \ - GLfloat *dest = tnl->vtx.attrptr[ATTR]; \ - if (N>0) dest[0] = v[0]; \ - if (N>1) dest[1] = v[1]; \ - if (N>2) dest[2] = v[2]; \ - if (N>3) dest[3] = v[3]; \ - } \ -} #define CHOOSE( ATTR, N ) \ static void choose_##ATTR##_##N( const GLfloat *v ) \ { \ - do_choose(ATTR, N, \ - attrib_##ATTR##_##N, \ - choose_##ATTR##_1, \ - choose_##ATTR##_2, \ - choose_##ATTR##_3, \ - choose_##ATTR##_4, \ - v ); \ -} - -#define INIT(ATTR) \ -static void init_##ATTR( TNLcontext *tnl ) \ -{ \ - tnl->vtx.tabfv[ATTR][0] = choose_##ATTR##_1; \ - tnl->vtx.tabfv[ATTR][1] = choose_##ATTR##_2; \ - tnl->vtx.tabfv[ATTR][2] = choose_##ATTR##_3; \ - tnl->vtx.tabfv[ATTR][3] = choose_##ATTR##_4; \ + attrfv_func f = do_choose(ATTR, N); \ + f( v ); \ } - -#define ATTRS( ATTRIB ) \ - ATTRFV( ATTRIB, 1 ) \ - ATTRFV( ATTRIB, 2 ) \ - ATTRFV( ATTRIB, 3 ) \ - ATTRFV( ATTRIB, 4 ) \ +#define CHOOSERS( ATTRIB ) \ CHOOSE( ATTRIB, 1 ) \ CHOOSE( ATTRIB, 2 ) \ CHOOSE( ATTRIB, 3 ) \ CHOOSE( ATTRIB, 4 ) \ - INIT( ATTRIB ) \ -/* Generate a lot of functions. These are the actual worker - * functions, which are equivalent to those generated via codegen - * elsewhere. - */ -ATTRS( 0 ) -ATTRS( 1 ) -ATTRS( 2 ) -ATTRS( 3 ) -ATTRS( 4 ) -ATTRS( 5 ) -ATTRS( 6 ) -ATTRS( 7 ) -ATTRS( 8 ) -ATTRS( 9 ) -ATTRS( 10 ) -ATTRS( 11 ) -ATTRS( 12 ) -ATTRS( 13 ) -ATTRS( 14 ) -ATTRS( 15 ) - +#define INIT_CHOOSERS(ATTR) \ + choose[ATTR][0] = choose_##ATTR##_1; \ + choose[ATTR][1] = choose_##ATTR##_2; \ + choose[ATTR][2] = choose_##ATTR##_3; \ + choose[ATTR][3] = choose_##ATTR##_4; + +CHOOSERS( 0 ) +CHOOSERS( 1 ) +CHOOSERS( 2 ) +CHOOSERS( 3 ) +CHOOSERS( 4 ) +CHOOSERS( 5 ) +CHOOSERS( 6 ) +CHOOSERS( 7 ) +CHOOSERS( 8 ) +CHOOSERS( 9 ) +CHOOSERS( 10 ) +CHOOSERS( 11 ) +CHOOSERS( 12 ) +CHOOSERS( 13 ) +CHOOSERS( 14 ) +CHOOSERS( 15 ) static void error_attrib( const GLfloat *unused ) { @@ -462,326 +463,25 @@ static void error_attrib( const GLfloat *unused ) _mesa_error( ctx, GL_INVALID_ENUM, "glVertexAttrib" ); } -static void init_error_attrib( TNLcontext *tnl ) -{ - tnl->vtx.tabfv[ERROR_ATTRIB][0] = error_attrib; - tnl->vtx.tabfv[ERROR_ATTRIB][1] = error_attrib; - tnl->vtx.tabfv[ERROR_ATTRIB][2] = error_attrib; - tnl->vtx.tabfv[ERROR_ATTRIB][3] = error_attrib; -} - -static void init_attrfv( TNLcontext *tnl ) +static void reset_attrfv( TNLcontext *tnl ) { - if (tnl->vtx.vertex_size) { - GLuint i; - - init_0( tnl ); - init_1( tnl ); - init_2( tnl ); - init_3( tnl ); - init_4( tnl ); - init_5( tnl ); - init_6( tnl ); - init_7( tnl ); - init_8( tnl ); - init_9( tnl ); - init_10( tnl ); - init_11( tnl ); - init_12( tnl ); - init_13( tnl ); - init_14( tnl ); - init_15( tnl ); - init_error_attrib( tnl ); - - for (i = 0 ; i < _TNL_ATTRIB_MAX ; i++) - tnl->vtx.attrsz[i] = 0; - - tnl->vtx.vertex_size = 0; - tnl->vtx.have_materials = 0; - } -} - -/* These can be made efficient with codegen. Further, by adding more - * logic to do_choose(), the double-dispatch for legacy entrypoints - * like glVertex3f() can be removed. - */ -#define DISPATCH_ATTRFV( ATTR, COUNT, P ) \ -do { \ - GET_CURRENT_CONTEXT( ctx ); \ - TNLcontext *tnl = TNL_CONTEXT(ctx); \ - tnl->vtx.tabfv[ATTR][COUNT-1]( P ); \ -} while (0) - -#define DISPATCH_ATTR1FV( ATTR, V ) DISPATCH_ATTRFV( ATTR, 1, V ) -#define DISPATCH_ATTR2FV( ATTR, V ) DISPATCH_ATTRFV( ATTR, 2, V ) -#define DISPATCH_ATTR3FV( ATTR, V ) DISPATCH_ATTRFV( ATTR, 3, V ) -#define DISPATCH_ATTR4FV( ATTR, V ) DISPATCH_ATTRFV( ATTR, 4, V ) - -#define DISPATCH_ATTR1F( ATTR, S ) DISPATCH_ATTRFV( ATTR, 1, &(S) ) - -#define DISPATCH_ATTR2F( ATTR, S,T ) \ -do { \ - GLfloat v[2]; \ - v[0] = S; v[1] = T; \ - DISPATCH_ATTR2FV( ATTR, v ); \ -} while (0) -#define DISPATCH_ATTR3F( ATTR, S,T,R ) \ -do { \ - GLfloat v[3]; \ - v[0] = S; v[1] = T; v[2] = R; \ - DISPATCH_ATTR3FV( ATTR, v ); \ -} while (0) -#define DISPATCH_ATTR4F( ATTR, S,T,R,Q ) \ -do { \ - GLfloat v[4]; \ - v[0] = S; v[1] = T; v[2] = R; v[3] = Q; \ - DISPATCH_ATTR4FV( ATTR, v ); \ -} while (0) - - -static void enum_error( void ) -{ - GET_CURRENT_CONTEXT( ctx ); - _mesa_error( ctx, GL_INVALID_ENUM, "glVertexAttrib" ); -} - -static void GLAPIENTRY _tnl_Vertex2f( GLfloat x, GLfloat y ) -{ - DISPATCH_ATTR2F( _TNL_ATTRIB_POS, x, y ); -} - -static void GLAPIENTRY _tnl_Vertex2fv( const GLfloat *v ) -{ - DISPATCH_ATTR2FV( _TNL_ATTRIB_POS, v ); -} - -static void GLAPIENTRY _tnl_Vertex3f( GLfloat x, GLfloat y, GLfloat z ) -{ - DISPATCH_ATTR3F( _TNL_ATTRIB_POS, x, y, z ); -} - -static void GLAPIENTRY _tnl_Vertex3fv( const GLfloat *v ) -{ - DISPATCH_ATTR3FV( _TNL_ATTRIB_POS, v ); -} - -static void GLAPIENTRY _tnl_Vertex4f( GLfloat x, GLfloat y, GLfloat z, - GLfloat w ) -{ - DISPATCH_ATTR4F( _TNL_ATTRIB_POS, x, y, z, w ); -} - -static void GLAPIENTRY _tnl_Vertex4fv( const GLfloat *v ) -{ - DISPATCH_ATTR4FV( _TNL_ATTRIB_POS, v ); -} - -static void GLAPIENTRY _tnl_TexCoord1f( GLfloat x ) -{ - DISPATCH_ATTR1F( _TNL_ATTRIB_TEX0, x ); -} - -static void GLAPIENTRY _tnl_TexCoord1fv( const GLfloat *v ) -{ - DISPATCH_ATTR1FV( _TNL_ATTRIB_TEX0, v ); -} - -static void GLAPIENTRY _tnl_TexCoord2f( GLfloat x, GLfloat y ) -{ - DISPATCH_ATTR2F( _TNL_ATTRIB_TEX0, x, y ); -} - -static void GLAPIENTRY _tnl_TexCoord2fv( const GLfloat *v ) -{ - DISPATCH_ATTR2FV( _TNL_ATTRIB_TEX0, v ); -} - -static void GLAPIENTRY _tnl_TexCoord3f( GLfloat x, GLfloat y, GLfloat z ) -{ - DISPATCH_ATTR3F( _TNL_ATTRIB_TEX0, x, y, z ); -} - -static void GLAPIENTRY _tnl_TexCoord3fv( const GLfloat *v ) -{ - DISPATCH_ATTR3FV( _TNL_ATTRIB_TEX0, v ); -} - -static void GLAPIENTRY _tnl_TexCoord4f( GLfloat x, GLfloat y, GLfloat z, - GLfloat w ) -{ - DISPATCH_ATTR4F( _TNL_ATTRIB_TEX0, x, y, z, w ); -} - -static void GLAPIENTRY _tnl_TexCoord4fv( const GLfloat *v ) -{ - DISPATCH_ATTR4FV( _TNL_ATTRIB_TEX0, v ); -} - -static void GLAPIENTRY _tnl_Normal3f( GLfloat x, GLfloat y, GLfloat z ) -{ - DISPATCH_ATTR3F( _TNL_ATTRIB_NORMAL, x, y, z ); -} - -static void GLAPIENTRY _tnl_Normal3fv( const GLfloat *v ) -{ - DISPATCH_ATTR3FV( _TNL_ATTRIB_NORMAL, v ); -} - -static void GLAPIENTRY _tnl_FogCoordfEXT( GLfloat x ) -{ - DISPATCH_ATTR1F( _TNL_ATTRIB_FOG, x ); -} - -static void GLAPIENTRY _tnl_FogCoordfvEXT( const GLfloat *v ) -{ - DISPATCH_ATTR1FV( _TNL_ATTRIB_FOG, v ); -} - -static void GLAPIENTRY _tnl_Color3f( GLfloat x, GLfloat y, GLfloat z ) -{ - DISPATCH_ATTR3F( _TNL_ATTRIB_COLOR0, x, y, z ); -} - -static void GLAPIENTRY _tnl_Color3fv( const GLfloat *v ) -{ - DISPATCH_ATTR3FV( _TNL_ATTRIB_COLOR0, v ); -} - -static void GLAPIENTRY _tnl_Color4f( GLfloat x, GLfloat y, GLfloat z, - GLfloat w ) -{ - DISPATCH_ATTR4F( _TNL_ATTRIB_COLOR0, x, y, z, w ); -} - -static void GLAPIENTRY _tnl_Color4fv( const GLfloat *v ) -{ - DISPATCH_ATTR4FV( _TNL_ATTRIB_COLOR0, v ); -} - -static void GLAPIENTRY _tnl_SecondaryColor3fEXT( GLfloat x, GLfloat y, - GLfloat z ) -{ - DISPATCH_ATTR3F( _TNL_ATTRIB_COLOR1, x, y, z ); -} - -static void GLAPIENTRY _tnl_SecondaryColor3fvEXT( const GLfloat *v ) -{ - DISPATCH_ATTR3FV( _TNL_ATTRIB_COLOR1, v ); -} - -static void GLAPIENTRY _tnl_MultiTexCoord1f( GLenum target, GLfloat x ) -{ - GLuint attr = (target & 0x7) + _TNL_ATTRIB_TEX0; - DISPATCH_ATTR1F( attr, x ); -} - -static void GLAPIENTRY _tnl_MultiTexCoord1fv( GLenum target, - const GLfloat *v ) -{ - GLuint attr = (target & 0x7) + _TNL_ATTRIB_TEX0; - DISPATCH_ATTR1FV( attr, v ); -} - -static void GLAPIENTRY _tnl_MultiTexCoord2f( GLenum target, GLfloat x, - GLfloat y ) -{ - GLuint attr = (target & 0x7) + _TNL_ATTRIB_TEX0; - DISPATCH_ATTR2F( attr, x, y ); -} - -static void GLAPIENTRY _tnl_MultiTexCoord2fv( GLenum target, - const GLfloat *v ) -{ - GLuint attr = (target & 0x7) + _TNL_ATTRIB_TEX0; - DISPATCH_ATTR2FV( attr, v ); -} - -static void GLAPIENTRY _tnl_MultiTexCoord3f( GLenum target, GLfloat x, - GLfloat y, GLfloat z) -{ - GLuint attr = (target & 0x7) + _TNL_ATTRIB_TEX0; - DISPATCH_ATTR3F( attr, x, y, z ); -} - -static void GLAPIENTRY _tnl_MultiTexCoord3fv( GLenum target, - const GLfloat *v ) -{ - GLuint attr = (target & 0x7) + _TNL_ATTRIB_TEX0; - DISPATCH_ATTR3FV( attr, v ); -} - -static void GLAPIENTRY _tnl_MultiTexCoord4f( GLenum target, GLfloat x, - GLfloat y, GLfloat z, - GLfloat w ) -{ - GLuint attr = (target & 0x7) + _TNL_ATTRIB_TEX0; - DISPATCH_ATTR4F( attr, x, y, z, w ); -} - -static void GLAPIENTRY _tnl_MultiTexCoord4fv( GLenum target, - const GLfloat *v ) -{ - GLuint attr = (target & 0x7) + _TNL_ATTRIB_TEX0; - DISPATCH_ATTR4FV( attr, v ); -} - -static void GLAPIENTRY _tnl_VertexAttrib1fNV( GLuint index, GLfloat x ) -{ - if (index >= VERT_ATTRIB_MAX) index = ERROR_ATTRIB; - DISPATCH_ATTR1F( index, x ); -} - -static void GLAPIENTRY _tnl_VertexAttrib1fvNV( GLuint index, - const GLfloat *v ) -{ - if (index >= VERT_ATTRIB_MAX) index = ERROR_ATTRIB; - DISPATCH_ATTR1FV( index, v ); -} - -static void GLAPIENTRY _tnl_VertexAttrib2fNV( GLuint index, GLfloat x, - GLfloat y ) -{ - if (index >= VERT_ATTRIB_MAX) index = ERROR_ATTRIB; - DISPATCH_ATTR2F( index, x, y ); -} - -static void GLAPIENTRY _tnl_VertexAttrib2fvNV( GLuint index, - const GLfloat *v ) -{ - if (index >= VERT_ATTRIB_MAX) index = ERROR_ATTRIB; - DISPATCH_ATTR2FV( index, v ); -} - -static void GLAPIENTRY _tnl_VertexAttrib3fNV( GLuint index, GLfloat x, - GLfloat y, GLfloat z ) -{ - if (index >= VERT_ATTRIB_MAX) index = ERROR_ATTRIB; - DISPATCH_ATTR3F( index, x, y, z ); -} + GLuint i; -static void GLAPIENTRY _tnl_VertexAttrib3fvNV( GLuint index, - const GLfloat *v ) -{ - if (index >= VERT_ATTRIB_MAX) index = ERROR_ATTRIB; - DISPATCH_ATTR3FV( index, v ); -} + for (i = 0 ; i < _TNL_ATTRIB_MAX ; i++) + if (tnl->vtx.attrsz[i]) { + GLuint j = tnl->vtx.attrsz[i] - 1; + tnl->vtx.attrsz[i] = 0; -static void GLAPIENTRY _tnl_VertexAttrib4fNV( GLuint index, GLfloat x, - GLfloat y, GLfloat z, - GLfloat w ) -{ - if (index >= VERT_ATTRIB_MAX) index = ERROR_ATTRIB; - DISPATCH_ATTR4F( index, x, y, z, w ); -} + if (i < _TNL_MAX_ATTR_CODEGEN) + tnl->vtx.tabfv[i][j] = choose[i][j]; + } -static void GLAPIENTRY _tnl_VertexAttrib4fvNV( GLuint index, - const GLfloat *v ) -{ - if (index >= VERT_ATTRIB_MAX) index = ERROR_ATTRIB; - DISPATCH_ATTR4FV( index, v ); + tnl->vtx.vertex_size = 0; + tnl->vtx.have_materials = 0; } + /* Materials: @@ -797,35 +497,33 @@ static void GLAPIENTRY _tnl_VertexAttrib4fvNV( GLuint index, * * There is no aliasing of material attributes with other entrypoints. */ -#define MAT_ATTR( A, N, params ) \ +#define OTHER_ATTR( A, N, params ) \ do { \ if (tnl->vtx.attrsz[A] != N) { \ _tnl_fixup_vertex( ctx, A, N ); \ - tnl->vtx.have_materials = GL_TRUE; \ } \ \ { \ GLfloat *dest = tnl->vtx.attrptr[A]; \ - if (N>0) dest[0] = params[0]; \ - if (N>1) dest[1] = params[1]; \ - if (N>2) dest[2] = params[2]; \ - if (N>3) dest[3] = params[3]; \ + if (N>0) dest[0] = (params)[0]; \ + if (N>1) dest[1] = (params)[1]; \ + if (N>2) dest[2] = (params)[2]; \ + if (N>3) dest[3] = (params)[3]; \ ctx->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT; \ } \ } while (0) -#define MAT( ATTR, N, face, params ) \ -do { \ - if (face != GL_BACK) \ - MAT_ATTR( ATTR, N, params ); /* front */ \ - if (face != GL_FRONT) \ - MAT_ATTR( ATTR + 1, N, params ); /* back */ \ +#define MAT( ATTR, N, face, params ) \ +do { \ + if (face != GL_BACK) \ + OTHER_ATTR( ATTR, N, params ); /* front */ \ + if (face != GL_FRONT) \ + OTHER_ATTR( ATTR + 1, N, params ); /* back */ \ } while (0) -/* NOTE: Have to remove/deal-with colormaterial crossovers, probably - * later on - in the meantime just store everything. +/* Colormaterial is dealt with later on. */ static void GLAPIENTRY _tnl_Materialfv( GLenum face, GLenum pname, const GLfloat *params ) @@ -871,44 +569,43 @@ static void GLAPIENTRY _tnl_Materialfv( GLenum face, GLenum pname, _mesa_error( ctx, GL_INVALID_ENUM, "glMaterialfv" ); return; } -} - -#define IDX_ATTR( A, IDX ) \ -do { \ - GET_CURRENT_CONTEXT( ctx ); \ - TNLcontext *tnl = TNL_CONTEXT(ctx); \ - \ - if (tnl->vtx.attrsz[A] != 1) { \ - _tnl_fixup_vertex( ctx, A, 1 ); \ - } \ - \ - { \ - GLfloat *dest = tnl->vtx.attrptr[A]; \ - dest[0] = IDX; \ - ctx->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT; \ - } \ -} while (0) + tnl->vtx.have_materials = GL_TRUE; +} static void GLAPIENTRY _tnl_EdgeFlag( GLboolean b ) { - IDX_ATTR( _TNL_ATTRIB_EDGEFLAG, (GLfloat)b ); + GET_CURRENT_CONTEXT( ctx ); + TNLcontext *tnl = TNL_CONTEXT(ctx); + GLfloat f = (GLfloat)b; + + OTHER_ATTR( _TNL_ATTRIB_EDGEFLAG, 1, &f ); } static void GLAPIENTRY _tnl_EdgeFlagv( const GLboolean *v ) { - IDX_ATTR( _TNL_ATTRIB_EDGEFLAG, (GLfloat)v[0] ); + GET_CURRENT_CONTEXT( ctx ); + TNLcontext *tnl = TNL_CONTEXT(ctx); + GLfloat f = (GLfloat)v[0]; + + OTHER_ATTR( _TNL_ATTRIB_EDGEFLAG, 1, &f ); } static void GLAPIENTRY _tnl_Indexf( GLfloat f ) { - IDX_ATTR( _TNL_ATTRIB_INDEX, f ); + GET_CURRENT_CONTEXT( ctx ); + TNLcontext *tnl = TNL_CONTEXT(ctx); + + OTHER_ATTR( _TNL_ATTRIB_INDEX, 1, &f ); } static void GLAPIENTRY _tnl_Indexfv( const GLfloat *v ) { - IDX_ATTR( _TNL_ATTRIB_INDEX, v[0] ); + GET_CURRENT_CONTEXT( ctx ); + TNLcontext *tnl = TNL_CONTEXT(ctx); + + OTHER_ATTR( _TNL_ATTRIB_INDEX, 1, v ); } /* Eval @@ -1029,7 +726,8 @@ static void GLAPIENTRY _tnl_Begin( GLenum mode ) if (ctx->NewState) { _mesa_update_state( ctx ); - if (!(tnl->Driver.NotifyBegin && tnl->Driver.NotifyBegin( ctx, mode ))) + if (!(tnl->Driver.NotifyBegin && + tnl->Driver.NotifyBegin( ctx, mode ))) ctx->Exec->Begin(mode); return; } @@ -1086,14 +784,11 @@ static void GLAPIENTRY _tnl_End( void ) static void _tnl_exec_vtxfmt_init( GLcontext *ctx ) { GLvertexformat *vfmt = &(TNL_CONTEXT(ctx)->exec_vtxfmt); + vfmt->ArrayElement = _ae_loopback_array_elt; /* generic helper */ vfmt->Begin = _tnl_Begin; vfmt->CallList = _mesa_CallList; vfmt->CallLists = _mesa_CallLists; - vfmt->Color3f = _tnl_Color3f; - vfmt->Color3fv = _tnl_Color3fv; - vfmt->Color4f = _tnl_Color4f; - vfmt->Color4fv = _tnl_Color4fv; vfmt->EdgeFlag = _tnl_EdgeFlag; vfmt->EdgeFlagv = _tnl_EdgeFlagv; vfmt->End = _tnl_End; @@ -1103,45 +798,9 @@ static void _tnl_exec_vtxfmt_init( GLcontext *ctx ) vfmt->EvalCoord2fv = _tnl_EvalCoord2fv; vfmt->EvalPoint1 = _tnl_EvalPoint1; vfmt->EvalPoint2 = _tnl_EvalPoint2; - vfmt->FogCoordfEXT = _tnl_FogCoordfEXT; - vfmt->FogCoordfvEXT = _tnl_FogCoordfvEXT; vfmt->Indexf = _tnl_Indexf; vfmt->Indexfv = _tnl_Indexfv; vfmt->Materialfv = _tnl_Materialfv; - vfmt->MultiTexCoord1fARB = _tnl_MultiTexCoord1f; - vfmt->MultiTexCoord1fvARB = _tnl_MultiTexCoord1fv; - vfmt->MultiTexCoord2fARB = _tnl_MultiTexCoord2f; - vfmt->MultiTexCoord2fvARB = _tnl_MultiTexCoord2fv; - vfmt->MultiTexCoord3fARB = _tnl_MultiTexCoord3f; - vfmt->MultiTexCoord3fvARB = _tnl_MultiTexCoord3fv; - vfmt->MultiTexCoord4fARB = _tnl_MultiTexCoord4f; - vfmt->MultiTexCoord4fvARB = _tnl_MultiTexCoord4fv; - vfmt->Normal3f = _tnl_Normal3f; - vfmt->Normal3fv = _tnl_Normal3fv; - vfmt->SecondaryColor3fEXT = _tnl_SecondaryColor3fEXT; - vfmt->SecondaryColor3fvEXT = _tnl_SecondaryColor3fvEXT; - vfmt->TexCoord1f = _tnl_TexCoord1f; - vfmt->TexCoord1fv = _tnl_TexCoord1fv; - vfmt->TexCoord2f = _tnl_TexCoord2f; - vfmt->TexCoord2fv = _tnl_TexCoord2fv; - vfmt->TexCoord3f = _tnl_TexCoord3f; - vfmt->TexCoord3fv = _tnl_TexCoord3fv; - vfmt->TexCoord4f = _tnl_TexCoord4f; - vfmt->TexCoord4fv = _tnl_TexCoord4fv; - vfmt->Vertex2f = _tnl_Vertex2f; - vfmt->Vertex2fv = _tnl_Vertex2fv; - vfmt->Vertex3f = _tnl_Vertex3f; - vfmt->Vertex3fv = _tnl_Vertex3fv; - vfmt->Vertex4f = _tnl_Vertex4f; - vfmt->Vertex4fv = _tnl_Vertex4fv; - vfmt->VertexAttrib1fNV = _tnl_VertexAttrib1fNV; - vfmt->VertexAttrib1fvNV = _tnl_VertexAttrib1fvNV; - vfmt->VertexAttrib2fNV = _tnl_VertexAttrib2fNV; - vfmt->VertexAttrib2fvNV = _tnl_VertexAttrib2fvNV; - vfmt->VertexAttrib3fNV = _tnl_VertexAttrib3fNV; - vfmt->VertexAttrib3fvNV = _tnl_VertexAttrib3fvNV; - vfmt->VertexAttrib4fNV = _tnl_VertexAttrib4fNV; - vfmt->VertexAttrib4fvNV = _tnl_VertexAttrib4fvNV; vfmt->Rectf = _mesa_noop_Rectf; vfmt->EvalMesh1 = _mesa_noop_EvalMesh1; @@ -1161,13 +820,9 @@ void _tnl_FlushVertices( GLcontext *ctx, GLuint flags ) _tnl_flush_vtx( ctx ); } - { + if (tnl->vtx.vertex_size) { _tnl_copy_to_current( ctx ); - - /* reset attrfv table - */ - init_attrfv( tnl ); - flags |= FLUSH_UPDATE_CURRENT; + reset_attrfv( tnl ); } ctx->Driver.NeedFlush = 0; @@ -1191,26 +846,92 @@ static void _tnl_current_init( GLcontext *ctx ) tnl->vtx.current[_TNL_ATTRIB_INDEX] = &ctx->Current.Index; } +static struct dynfn *no_codegen( GLcontext *ctx, int key ) +{ + return 0; +} void _tnl_vtx_init( GLcontext *ctx ) { TNLcontext *tnl = TNL_CONTEXT(ctx); struct tnl_vertex_arrays *tmp = &tnl->vtx_inputs; GLuint i; + static int firsttime = 1; + + if (firsttime) { + firsttime = 0; + + INIT_CHOOSERS( 0 ); + INIT_CHOOSERS( 1 ); + INIT_CHOOSERS( 2 ); + INIT_CHOOSERS( 3 ); + INIT_CHOOSERS( 4 ); + INIT_CHOOSERS( 5 ); + INIT_CHOOSERS( 6 ); + INIT_CHOOSERS( 7 ); + INIT_CHOOSERS( 8 ); + INIT_CHOOSERS( 9 ); + INIT_CHOOSERS( 10 ); + INIT_CHOOSERS( 11 ); + INIT_CHOOSERS( 12 ); + INIT_CHOOSERS( 13 ); + INIT_CHOOSERS( 14 ); + INIT_CHOOSERS( 15 ); + + choose[ERROR_ATTRIB][0] = error_attrib; + choose[ERROR_ATTRIB][1] = error_attrib; + choose[ERROR_ATTRIB][2] = error_attrib; + choose[ERROR_ATTRIB][3] = error_attrib; + + _tnl_generic_attr_table_init( generic_attr_func ); + } for (i = 0; i < _TNL_ATTRIB_INDEX; i++) _mesa_vector4f_init( &tmp->Attribs[i], 0, 0); + for (i = 0; i < 4; i++) { + make_empty_list( &tnl->vtx.cache.Vertex[i] ); + make_empty_list( &tnl->vtx.cache.Attribute[i] ); + tnl->vtx.gen.Vertex[i] = no_codegen; + tnl->vtx.gen.Attribute[i] = no_codegen; + } + + _tnl_InitX86Codegen( &tnl->vtx.gen ); + _tnl_current_init( ctx ); _tnl_exec_vtxfmt_init( ctx ); + _tnl_generic_exec_vtxfmt_init( ctx ); _mesa_install_exec_vtxfmt( ctx, &tnl->exec_vtxfmt ); - tnl->vtx.vertex_size = 1; init_attrfv( tnl ); + + memcpy( tnl->vtx.tabfv, choose, sizeof(choose) ); + + for (i = 0 ; i < _TNL_ATTRIB_MAX ; i++) + tnl->vtx.attrsz[i] = 0; + + tnl->vtx.vertex_size = 0; + tnl->vtx.have_materials = 0; } +static void free_funcs( struct dynfn *l ) +{ + struct dynfn *f, *tmp; + foreach_s (f, tmp, l) { + remove_from_list( f ); + ALIGN_FREE( f->code ); + FREE( f ); + } +} void _tnl_vtx_destroy( GLcontext *ctx ) { + TNLcontext *tnl = TNL_CONTEXT(ctx); + GLuint i; + + for (i = 0; i < 4; i++) { + free_funcs( &tnl->vtx.cache.Vertex[i] ); + free_funcs( &tnl->vtx.cache.Attribute[i] ); + } } diff --git a/src/mesa/tnl/t_vtx_api.h b/src/mesa/tnl/t_vtx_api.h index 2500320213..5e4ab71a08 100644 --- a/src/mesa/tnl/t_vtx_api.h +++ b/src/mesa/tnl/t_vtx_api.h @@ -37,12 +37,23 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "t_context.h" +#define ERROR_ATTRIB 16 + + + +/* t_vtx_api.c: + */ extern void _tnl_vtx_init( GLcontext *ctx ); extern void _tnl_vtx_destroy( GLcontext *ctx ); extern void _tnl_FlushVertices( GLcontext *ctx, GLuint flags ); extern void _tnl_flush_vtx( GLcontext *ctx ); +extern void _tnl_wrap_filled_vertex( GLcontext *ctx ); + +/* t_vtx_exec.c: + */ + extern void _tnl_do_EvalCoord2f( GLcontext* ctx, GLfloat u, GLfloat v ); extern void _tnl_do_EvalCoord1f(GLcontext* ctx, GLfloat u); extern void _tnl_update_eval( GLcontext *ctx ); @@ -55,4 +66,19 @@ extern GLboolean *_tnl_translate_edgeflag( GLcontext *ctx, extern GLboolean *_tnl_import_current_edgeflag( GLcontext *ctx, GLuint count ); + + +/* t_vtx_generic.c: + */ +extern void _tnl_generic_exec_vtxfmt_init( GLcontext *ctx ); + +extern void _tnl_generic_attr_table_init( attrfv_func (*tab)[4] ); + +/* t_vtx_x86.c: + */ +extern void _tnl_InitX86Codegen( struct dynfn_generators *gen ); + + + + #endif diff --git a/src/mesa/tnl/t_vtx_x86_gcc.S b/src/mesa/tnl/t_vtx_x86_gcc.S index 3a78838b67..dcaca47160 100644 --- a/src/mesa/tnl/t_vtx_x86_gcc.S +++ b/src/mesa/tnl/t_vtx_x86_gcc.S @@ -25,9 +25,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. **************************************************************************/ - +/* + * Authors: + * Keith Whitwell + */ - #define GLOBL( x ) \ .globl x; \ @@ -36,113 +38,127 @@ x: .data .align 4 +// Someone who knew a lot about this sort of thing would use this +// macro to note current offsets, etc in a special region of the +// object file & just make everything work out neat. I don't know +// enough to do that... +#define SUBST( x ) (0x10101010 + x) + + GLOBL ( _x86_Vertex1fv ) - ;; v already in eax + movl 4(%esp), %ecx push %edi push %esi - movl (0x0), %edi ; load vbptr - movl (%eax), %edx ; load v[0] - movl %edx, (%edi) ; vbptr[0] = v[0] - addl $4, %edi ; vbptr += 1 - movl $0x0, %ecx ; vertex_size - 1 - movl $0x0, %esi ; tnl->vtx.vertex + 1 + movl SUBST(0), %edi // 0x0 --> tnl->vtx.vbptr + movl (%ecx), %edx // load v[0] + movl %edx, (%edi) // tnl->vtx.vbptr[0] = v[0] + addl $4, %edi // tnl->vtx.vbptr += 1 + movl $SUBST(1), %ecx // 0x1 --> (tnl->vtx.vertex_size - 1) + movl $SUBST(2), %esi // 0x2 --> (tnl->vtx.vertex + 1) repz - movsl %ds:(%esi), %es:(%edi) - movl %edi, (0) ; save vbptr - movl (0), %edx ; load counter + movsl %ds:(%esi), %es:(%edi) + movl %edi, SUBST(0) // 0x0 --> tnl->vtx.vbptr + movl SUBST(3), %edx // 0x3 --> counter pop %esi pop %edi - dec %edx ; counter-- - movl %edx, (0) ; save counter - je .5 ; if (counter != 0) - ret ; return -.5: jmp *0 ; else notify(); + dec %edx // counter-- + movl %edx, SUBST(3) // 0x3 --> counter + je .5 // if (counter != 0) + ret // return +.5: mov $SUBST(4), %eax // else notify() + jmp *%eax // jmp $0x10101014 doesn't seem to work GLOBL ( _x86_Vertex1fv_end ) +.align 4 GLOBL ( _x86_Vertex2fv ) - ;; v already in eax + movl 4(%esp), %ecx push %edi push %esi - movl (0x0), %edi ; load vbptr - movl (%eax), %edx ; load v[0] - movl 4(%eax), %ecx ; load v[1] - movl %edx, (%edi) ; vbptr[0] = v[0] - movl %ecx, 4(%edi) ; vbptr[1] = v[1] - addl $8, %edi ; vbptr += 2 - movl $0x0, %ecx ; vertex_size - 2 - movl $0x0, %esi ; tnl->vtx.vertex + 2 + movl SUBST(0), %edi // load tnl->vtx.vbptr + movl (%ecx), %edx // load v[0] + movl 4(%ecx), %eax // load v[1] + movl %edx, (%edi) // tnl->vtx.vbptr[0] = v[0] + movl %eax, 4(%edi) // tnl->vtx.vbptr[1] = v[1] + addl $8, %edi // tnl->vtx.vbptr += 2 + movl $SUBST(1), %ecx // vertex_size - 2 + movl $SUBST(2), %esi // tnl->vtx.vertex + 2 repz movsl %ds:(%esi), %es:(%edi) - movl %edi, (0) ; save vbptr - movl (0), %edx ; load counter + movl %edi, SUBST(0) // save tnl->vtx.vbptr + movl SUBST(3), %edx // load counter pop %esi pop %edi - dec %edx ; counter-- - movl %edx, (0) ; save counter - je .6 ; if (counter != 0) - ret ; return -.6: jmp *0 ; else notify(); -GLOBL ( _x86_Vertex3fv_end ) + dec %edx // counter-- + movl %edx, SUBST(3) // save counter + je .6 // if (counter != 0) + ret // return +.6: mov $SUBST(4), %eax // else notify() + jmp *%eax // jmp $0x10101014 doesn't seem to work +GLOBL ( _x86_Vertex2fv_end ) +.align 4 GLOBL ( _x86_Vertex3fv ) - ;; v already in eax + movl 4(%esp), %ecx push %edi push %esi - movl (0x0), %edi ; load vbptr - movl (%eax), %edx ; load v[0] - movl 4(%eax), %ecx ; load v[1] - movl 8(%eax), %esi ; load v[2] - movl %edx, (%edi) ; vbptr[0] = v[0] - movl %ecx, 4(%edi) ; vbptr[1] = v[1] - movl %esi, 8(%edi) ; vbptr[2] = v[2] - addl $12, %edi ; vbptr += 3 - movl $0x0, %ecx ; vertex_size - 3 - movl $0x0, %esi ; tnl->vtx.vertex + 3 + movl SUBST(0), %edi // load tnl->vtx.vbptr + movl (%ecx), %edx // load v[0] + movl 4(%ecx), %eax // load v[1] + movl 8(%ecx), %esi // load v[2] + movl %edx, (%edi) // tnl->vtx.vbptr[0] = v[0] + movl %eax, 4(%edi) // tnl->vtx.vbptr[1] = v[1] + movl %esi, 8(%edi) // tnl->vtx.vbptr[2] = v[2] + addl $12, %edi // tnl->vtx.vbptr += 3 + movl $SUBST(1), %ecx // vertex_size - 3 + movl $SUBST(2), %esi // tnl->vtx.vertex + 3 repz movsl %ds:(%esi), %es:(%edi) - movl %edi, (0) ; save vbptr - movl (0), %edx ; load counter + movl %edi, SUBST(0) // save tnl->vtx.vbptr + movl SUBST(3), %edx // load counter pop %esi pop %edi - dec %edx ; counter-- - movl %edx, (0) ; save counter - je .7 ; if (counter != 0) - ret ; return -.7: jmp *0 ; else notify(); + dec %edx // counter-- + movl %edx, SUBST(3) // save counter + je .7 // if (counter != 0) + ret // return +.7: mov $SUBST(4), %eax // else notify() + jmp *%eax // jmp $0x10101014 doesn't seem to work GLOBL ( _x86_Vertex3fv_end ) +.align 4 GLOBL ( _x86_Vertex4fv ) - ;; v already in eax + movl 4(%esp), %ecx push %edi push %esi - movl (0x0), %edi ; load vbptr - movl (%eax), %edx ; load v[0] - movl 4(%eax), %ecx ; load v[1] - movl 8(%eax), %esi ; load v[2] - movl %edx, (%edi) ; vbptr[0] = v[0] - movl %ecx, 4(%edi) ; vbptr[1] = v[1] - movl %esi, 8(%edi) ; vbptr[2] = v[2] - movl 12(%eax), %esi ; load v[3] - movl %esi, 12(%edi) ; vbptr[3] = v[3] - addl $16, %edi ; vbptr += 4 - movl $0x0, %ecx ; vertex_size - 4 - movl $0x0, %esi ; tnl->vtx.vertex + 3 + movl SUBST(0), %edi // load tnl->vtx.vbptr + movl (%ecx), %edx // load v[0] + movl 4(%ecx), %eax // load v[1] + movl 8(%ecx), %esi // load v[2] + movl 12(%ecx), %ecx // load v[3] + movl %edx, (%edi) // tnl->vtx.vbptr[0] = v[0] + movl %eax, 4(%edi) // tnl->vtx.vbptr[1] = v[1] + movl %esi, 8(%edi) // tnl->vtx.vbptr[2] = v[2] + movl %ecx, 12(%edi) // tnl->vtx.vbptr[3] = v[3] + addl $16, %edi // tnl->vtx.vbptr += 4 + movl $SUBST(1), %ecx // vertex_size - 4 + movl $SUBST(2), %esi // tnl->vtx.vertex + 3 repz - movsl %ds:(%esi), %es:(%edi) - movl %edi, (0) ; save vbptr - movl (0), %edx ; load counter + movsl %ds:(%esi), %es:(%edi) + movl %edi, SUBST(0) // save tnl->vtx.vbptr + movl SUBST(3), %edx // load counter pop %esi pop %edi - dec %edx ; counter-- - movl %edx, (0) ; save counter - je .6 ; if (counter != 0) - ret ; return -.6: jmp *0 ; else notify(); -GLOBL ( _x86_Vertex3fv_end ) + dec %edx // counter-- + movl %edx, SUBST(3) // save counter + je .6 // if (counter != 0) + ret // return +.8: mov $SUBST(4), %eax // else notify() + jmp *%eax // jmp $0x10101014 doesn't seem to work +GLOBL ( _x86_Vertex4fv_end ) @@ -151,92 +167,151 @@ GLOBL ( _x86_Vertex3fv_end ) */ GLOBL( _x86_Attribute1fv) - /* 'v' is already in eax */ - movl (%eax), %ecx /* load v[0] */ - movl %ecx, 0 /* store v[0] to current vertex */ + movl 4(%esp), %ecx + movl (%ecx), %eax /* load v[0] */ + movl %eax, SUBST(0) /* store v[0] to current vertex */ ret -GLOBL ( _x86_Attribute2fv_end ) +GLOBL ( _x86_Attribute1fv_end ) GLOBL( _x86_Attribute2fv) - /* 'v' is already in eax */ - movl (%eax), %ecx /* load v[0] */ - movl 4(%eax), %eax /* load v[1] */ - movl %ecx, 0 /* store v[0] to current vertex */ - movl %eax, 4 /* store v[1] to current vertex */ + movl 4(%esp), %ecx + movl (%ecx), %eax /* load v[0] */ + movl 4(%ecx), %edx /* load v[1] */ + movl %eax, SUBST(0) /* store v[0] to current vertex */ + movl %edx, SUBST(1) /* store v[1] to current vertex */ ret GLOBL ( _x86_Attribute2fv_end ) GLOBL( _x86_Attribute3fv) - /* 'v' is already in eax */ - movl (%eax), %ecx /* load v[0] */ - movl 4(%eax), %edx /* load v[1] */ - movl 8(%eax), %eax /* load v[2] */ - movl %ecx, 0 /* store v[0] to current vertex */ - movl %edx, 4 /* store v[1] to current vertex */ - movl %eax, 8 /* store v[2] to current vertex */ + movl 4(%esp), %ecx + movl (%ecx), %eax /* load v[0] */ + movl 4(%ecx), %edx /* load v[1] */ + movl 8(%ecx), %ecx /* load v[2] */ + movl %eax, SUBST(0) /* store v[0] to current vertex */ + movl %edx, SUBST(1) /* store v[1] to current vertex */ + movl %ecx, SUBST(2) /* store v[2] to current vertex */ ret GLOBL ( _x86_Attribute3fv_end ) GLOBL( _x86_Attribute4fv) - /* 'v' is already in eax */ - movl (%eax), %ecx /* load v[0] */ - movl 4(%eax), %edx /* load v[1] */ - movl %ecx, 0 /* store v[0] to current vertex */ - movl %edx, 4 /* store v[1] to current vertex */ - movl 8(%eax), %ecx /* load v[2] */ - movl 12(%eax), %edx /* load v[3] */ - movl %ecx, 8 /* store v[2] to current vertex */ - movl %edx, 12 /* store v[3] to current vertex */ + movl 4(%esp), %ecx + movl (%ecx), %eax /* load v[0] */ + movl 4(%ecx), %edx /* load v[1] */ + movl %eax, SUBST(0) /* store v[0] to current vertex */ + movl %edx, SUBST(1) /* store v[1] to current vertex */ + movl 8(%ecx), %eax /* load v[2] */ + movl 12(%ecx), %edx /* load v[3] */ + movl %eax, SUBST(2) /* store v[2] to current vertex */ + movl %edx, SUBST(3) /* store v[3] to current vertex */ ret -GLOBL ( _x86_Attribute3fv_end ) +GLOBL ( _x86_Attribute4fv_end ) + + +// Choosers: + +// Must generate all of these ahead of first usage. Generate at +// compile-time? + +// NOT CURRENTLY USED -;;; In the 1st level dispatch functions, switch to a different -;;; calling convention -- (const GLfloat *v) in %eax. -;;; -;;; As with regular (x86) dispatch, don't create a new stack frame - -;;; just let the 'ret' in the dispatched function return straight -;;; back to the original caller. +GLOBL( _x86_choose_fv) + subl $12, %esp // gcc does 16 byte alignment of stack frames? + movl $SUBST(0), (%esp) // arg 0 - attrib + movl $SUBST(1), 4(%esp) // arg 1 - N + call _do_choose // new function returned in %eax + add $12, %esp // tear down stack frame + jmp *%eax // jump to new func +GLOBL ( _x86_choosefv_end ) + + +// FIRST LEVEL FUNCTIONS -- these are plugged directly into GL dispatch. -;;; Vertex/Normal/Color, etc: the address of the function pointer -;;; is known at codegen time. + +// NOT CURRENTLY USED + + +// In the 1st level dispatch functions, switch to a different +// calling convention -- (const GLfloat *v) in %ecx. +// +// As with regular (x86) dispatch, don't create a new stack frame - +// just let the 'ret' in the dispatched function return straight +// back to the original caller. + + + +// Vertex/Normal/Color, etc: the address of the function pointer +// is known at codegen time. + + +// Unfortunately, have to play with the stack in the non-fv case: +// GLOBL( _x86_dispatch_attrf ) - leal 4(%esp), %eax - jmp *foo + subl $12, %esp // gcc does 16 byte alignment of stack frames? + leal 16(%esp), %edx // address of first float on stack + movl %edx, (%esp) // save as 'v' + call SUBST(0) // 0x0 --> tabfv[attr][n] + addl $12, %esp // tear down frame + ret // return GLOBL( _x86_dispatch_attrf_end ) +// The fv case is simpler: +// GLOBL( _x86_dispatch_attrfv ) - movl 4(%esp), %eax - jmp *foo -GLOBL( _x86_dispatch_attr1f_end ) + jmp SUBST(0) // 0x0 --> tabfv[attr][n] +GLOBL( _x86_dispatch_attrfv_end ) -;;; MultiTexcoord: the address of the function pointer must be -;;; calculated. - + +// MultiTexcoord: the address of the function pointer must be +// calculated, but can use the index argument slot to hold 'v', and +// avoid setting up a new stack frame. + +// Also, will only need a maximum of four of each of these per context: +// GLOBL( _x86_dispatch_multitexcoordf ) - leal 4(%esp), %eax - jmp *foo + movl 4(%esp), %ecx + leal 8(%esp), %edx + andl $7, %ecx + movl %edx, 4(%esp) + sall $4, %ecx + jmp *SUBST(0)(%ecx) // 0x0 - tabfv[tex0][n] GLOBL( _x86_dispatch_multitexcoordf_end ) GLOBL( _x86_dispatch_multitexcoordfv ) - movl 4(%esp), %eax - jmp *foo + movl 4(%esp), %ecx + movl 8(%esp), %edx + andl $7, %ecx + movl %edx, 4(%esp) + sall $4, %ecx + jmp *SUBST(0)(%ecx) // 0x0 - tabfv[tex0][n] GLOBL( _x86_dispatch_multitexcoordfv_end ) -;;; VertexAttrib: the address of the function pointer must be -;;; calculated. +// VertexAttrib: the address of the function pointer must be +// calculated. GLOBL( _x86_dispatch_vertexattribf ) - leal 4(%esp), %eax - jmp *foo + movl $16, %ecx + movl 4(%esp), %eax + cmpl $16, %eax + cmovge %ecx, %eax + leal 8(%esp), %ecx // calculate 'v' + movl %ecx, 4(%esp) // save in 1st arg slot + sall $4, %eax + jmp *SUBST(0)(%eax) // 0x0 - tabfv[0][n] GLOBL( _x86_dispatch_vertexattribf_end ) GLOBL( _x86_dispatch_vertexattribfv ) + movl $16, %ecx movl 4(%esp), %eax - jmp *foo + cmpl $16, %eax + cmovge %ecx, %eax + movl 8(%esp), %ecx // load 'v' + movl %ecx, 4(%esp) // save in 1st arg slot + sall $4, %eax + jmp *SUBST(0)(%eax) // 0x0 - tabfv[0][n] GLOBL( _x86_dispatch_vertexattribfv_end ) - \ No newline at end of file + -- cgit v1.2.3