diff options
Diffstat (limited to 'src/mesa/tnl')
| -rw-r--r-- | src/mesa/tnl/t_save_api.c | 10 | ||||
| -rw-r--r-- | src/mesa/tnl/t_vtx_api.c | 4 | ||||
| -rw-r--r-- | src/mesa/tnl/t_vtx_api.h | 4 | ||||
| -rw-r--r-- | src/mesa/tnl/t_vtx_generic.c | 14 | ||||
| -rw-r--r-- | src/mesa/tnl/t_vtx_x86.c | 90 | ||||
| -rw-r--r-- | src/mesa/tnl/t_vtx_x86_gcc.S | 124 | 
6 files changed, 171 insertions, 75 deletions
diff --git a/src/mesa/tnl/t_save_api.c b/src/mesa/tnl/t_save_api.c index db338bbef6..18fa46951d 100644 --- a/src/mesa/tnl/t_save_api.c +++ b/src/mesa/tnl/t_save_api.c @@ -712,6 +712,15 @@ do {						\  #define DISPATCH_ATTR1F( ATTR, S ) DISPATCH_ATTRFV( ATTR, 1, &(S) ) +#ifdef USE_X86_ASM +/* Naughty cheat: + */ +#define DISPATCH_ATTR2F( ATTR, S,T ) DISPATCH_ATTRFV( ATTR, 2, &(S) ) +#define DISPATCH_ATTR3F( ATTR, S,T,R ) DISPATCH_ATTRFV( ATTR, 3, &(S) ) +#define DISPATCH_ATTR4F( ATTR, S,T,R,Q ) DISPATCH_ATTRFV( ATTR, 4, &(S) ) +#else +/* Safe: + */  #define DISPATCH_ATTR2F( ATTR, S,T ) 		\  do { 						\     GLfloat v[2]; 				\ @@ -730,6 +739,7 @@ do { 						\     v[0] = S; v[1] = T; v[2] = R; v[3] = Q;	\     DISPATCH_ATTR4FV( ATTR, v );			\  } while (0) +#endif  static void enum_error( void ) diff --git a/src/mesa/tnl/t_vtx_api.c b/src/mesa/tnl/t_vtx_api.c index 807d99952f..f47114cf62 100644 --- a/src/mesa/tnl/t_vtx_api.c +++ b/src/mesa/tnl/t_vtx_api.c @@ -883,6 +883,8 @@ void _tnl_vtx_init( GLcontext *ctx )        choose[ERROR_ATTRIB][2] = error_attrib;        choose[ERROR_ATTRIB][3] = error_attrib; +      _tnl_x86choosers(choose, do_choose); /* x86 INIT_CHOOSERS */ +        _tnl_generic_attr_table_init( generic_attr_func );     } @@ -901,7 +903,7 @@ void _tnl_vtx_init( GLcontext *ctx )     _tnl_current_init( ctx );     _tnl_exec_vtxfmt_init( ctx );     _tnl_generic_exec_vtxfmt_init( ctx ); -   _tnl_x86_exec_vtxfmt_init( ctx ); /* [dBorca] x86 DISPATCH_ATTRFV */ +   _tnl_x86_exec_vtxfmt_init( ctx ); /* x86 DISPATCH_ATTRFV */     _mesa_install_exec_vtxfmt( ctx, &tnl->exec_vtxfmt ); diff --git a/src/mesa/tnl/t_vtx_api.h b/src/mesa/tnl/t_vtx_api.h index 46700fcd0a..f58461332e 100644 --- a/src/mesa/tnl/t_vtx_api.h +++ b/src/mesa/tnl/t_vtx_api.h @@ -80,6 +80,10 @@ extern void _tnl_InitX86Codegen( struct _tnl_dynfn_generators *gen );  extern void _tnl_x86_exec_vtxfmt_init( GLcontext *ctx ); +extern void _tnl_x86choosers( attrfv_func (*choose)[4], +			      attrfv_func (*do_choose)( GLuint attr, +							GLuint sz )); + diff --git a/src/mesa/tnl/t_vtx_generic.c b/src/mesa/tnl/t_vtx_generic.c index 00dd2e8907..ea03dff05b 100644 --- a/src/mesa/tnl/t_vtx_generic.c +++ b/src/mesa/tnl/t_vtx_generic.c @@ -151,6 +151,15 @@ do {						\  #define DISPATCH_ATTR1F( ATTR, S ) DISPATCH_ATTRFV( ATTR, 1, &(S) ) +#ifdef USE_X86_ASM +/* Naughty cheat: + */ +#define DISPATCH_ATTR2F( ATTR, S,T ) DISPATCH_ATTRFV( ATTR, 2, &(S) ) +#define DISPATCH_ATTR3F( ATTR, S,T,R ) DISPATCH_ATTRFV( ATTR, 3, &(S) ) +#define DISPATCH_ATTR4F( ATTR, S,T,R,Q ) DISPATCH_ATTRFV( ATTR, 4, &(S) ) +#else +/* Safe: + */  #define DISPATCH_ATTR2F( ATTR, S,T ) 		\  do { 						\     GLfloat v[2]; 				\ @@ -169,6 +178,7 @@ do { 						\     v[0] = S; v[1] = T; v[2] = R; v[3] = Q;	\     DISPATCH_ATTR4FV( ATTR, v );			\  } while (0) +#endif  static void GLAPIENTRY _tnl_Vertex2f( GLfloat x, GLfloat y ) @@ -408,8 +418,8 @@ static void GLAPIENTRY _tnl_VertexAttrib4fvNV( GLuint index,  } -/* Install the generic versions of the 2nd level dispatch functions. - * [dBorca] Some of these have a codegen alternative. +/* Install the generic versions of the 2nd level dispatch + * functions.  Some of these have a codegen alternative.   */  void _tnl_generic_exec_vtxfmt_init( GLcontext *ctx )  { diff --git a/src/mesa/tnl/t_vtx_x86.c b/src/mesa/tnl/t_vtx_x86.c index 4f04a4efbb..66950e70e1 100644 --- a/src/mesa/tnl/t_vtx_x86.c +++ b/src/mesa/tnl/t_vtx_x86.c @@ -28,6 +28,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.  /*   * Authors:   *   Keith Whitwell <keith@tungstengraphics.com> + *   Daniel Borca <dborca@yahoo.com>   */ @@ -66,6 +67,8 @@ EXTERN( _tnl_x86_dispatch_multitexcoordfv );  EXTERN( _tnl_x86_dispatch_vertexattribf );  EXTERN( _tnl_x86_dispatch_vertexattribfv ); +EXTERN( _tnl_x86_choose_fv ); +  static void notify( void )  { @@ -77,7 +80,7 @@ static void notify( void )  #define DFN( FUNC, CACHE, KEY )				\ -   struct _tnl_dynfn *dfn = MALLOC_STRUCT( _tnl_dynfn );          \ +   struct _tnl_dynfn *dfn = MALLOC_STRUCT( _tnl_dynfn );\     char *start = (char *)&FUNC;				\     char *end = (char *)&FUNC##_end;			\     int offset = 0;               			\ @@ -254,20 +257,23 @@ void _tnl_InitX86Codegen( struct _tnl_dynfn_generators *gen )     gen->Attribute[3] = makeX86Attribute4fv;  } -void _do_choose( void ) + +static attrfv_func +_do_choose( GLuint attr, GLuint sz )  { +   return NULL;  } -/* [dBorca] I purposely avoided one single macro, since they might need to - * be handled in different ways. Ohwell, once things get much clearer, they - * could collapse... +/* I purposely avoided one single macro, since they might need to be + * handled in different ways.  Ohwell, once things get much clearer, + * they could collapse...   */  #define MAKE_DISPATCH_ATTR(FUNC, SIZE, TYPE, ATTR)			\  do {									\     char *code;								\     char *start = (char *)&_tnl_x86_dispatch_attr##TYPE;			\ -   char *end = (char *)&_tnl_x86_dispatch_attr##TYPE##_end;			\ +   char *end = (char *)&_tnl_x86_dispatch_attr##TYPE##_end;		\     int offset = 0;							\     code = ALIGN_MALLOC( end - start, 16 );				\     memcpy (code, start, end - start);					\ @@ -279,7 +285,7 @@ do {									\  #define MAKE_DISPATCH_MULTITEXCOORD(FUNC, SIZE, TYPE, ATTR)		\  do {									\     char *code;								\ -   char *start = (char *)&_tnl_x86_dispatch_multitexcoord##TYPE;		\ +   char *start = (char *)&_tnl_x86_dispatch_multitexcoord##TYPE;	\     char *end = (char *)&_tnl_x86_dispatch_multitexcoord##TYPE##_end;	\     int offset = 0;							\     code = ALIGN_MALLOC( end - start, 16 );				\ @@ -293,7 +299,7 @@ do {									\  do {									\     char *code;								\     char *start = (char *)&_tnl_x86_dispatch_vertexattrib##TYPE;		\ -   char *end = (char *)&_tnl_x86_dispatch_vertexattrib##TYPE##_end;		\ +   char *end = (char *)&_tnl_x86_dispatch_vertexattrib##TYPE##_end;	\     int offset = 0;							\     code = ALIGN_MALLOC( end - start, 16 );				\     memcpy (code, start, end - start);					\ @@ -301,7 +307,8 @@ do {									\     vfmt->FUNC##SIZE##TYPE##NV = code;					\  } while (0) -/* [dBorca] Install the codegen'ed versions of the 2nd level dispatch + +/* Install the codegen'ed versions of the 2nd level dispatch   * functions.  We should keep a list and free them in the end...   */  void _tnl_x86_exec_vtxfmt_init( GLcontext *ctx ) @@ -312,21 +319,70 @@ void _tnl_x86_exec_vtxfmt_init( GLcontext *ctx )     MAKE_DISPATCH_ATTR(Color,3,fv,    _TNL_ATTRIB_COLOR0);     MAKE_DISPATCH_ATTR(Color,4,f,     _TNL_ATTRIB_COLOR0);     MAKE_DISPATCH_ATTR(Color,4,fv,    _TNL_ATTRIB_COLOR0); +/* vfmt->FogCoordfEXT = _tnl_FogCoordfEXT; +   vfmt->FogCoordfvEXT = _tnl_FogCoordfvEXT;*/     MAKE_DISPATCH_ATTR(Normal,3,f,    _TNL_ATTRIB_NORMAL);     MAKE_DISPATCH_ATTR(Normal,3,fv,   _TNL_ATTRIB_NORMAL); +/* vfmt->SecondaryColor3fEXT = _tnl_SecondaryColor3fEXT; +   vfmt->SecondaryColor3fvEXT = _tnl_SecondaryColor3fvEXT; */ +   MAKE_DISPATCH_ATTR(TexCoord,1,f,  _TNL_ATTRIB_TEX0); +   MAKE_DISPATCH_ATTR(TexCoord,1,fv, _TNL_ATTRIB_TEX0);     MAKE_DISPATCH_ATTR(TexCoord,2,f,  _TNL_ATTRIB_TEX0);     MAKE_DISPATCH_ATTR(TexCoord,2,fv, _TNL_ATTRIB_TEX0); +   MAKE_DISPATCH_ATTR(TexCoord,3,f,  _TNL_ATTRIB_TEX0); +   MAKE_DISPATCH_ATTR(TexCoord,3,fv, _TNL_ATTRIB_TEX0); +   MAKE_DISPATCH_ATTR(TexCoord,4,f,  _TNL_ATTRIB_TEX0); +   MAKE_DISPATCH_ATTR(TexCoord,4,fv, _TNL_ATTRIB_TEX0); +   MAKE_DISPATCH_ATTR(Vertex,2,f,    _TNL_ATTRIB_POS); +   MAKE_DISPATCH_ATTR(Vertex,2,fv,   _TNL_ATTRIB_POS);     MAKE_DISPATCH_ATTR(Vertex,3,f,    _TNL_ATTRIB_POS);     MAKE_DISPATCH_ATTR(Vertex,3,fv,   _TNL_ATTRIB_POS); -   /* just add more */ +   MAKE_DISPATCH_ATTR(Vertex,4,f,    _TNL_ATTRIB_POS); +   MAKE_DISPATCH_ATTR(Vertex,4,fv,   _TNL_ATTRIB_POS); +   MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,1,f,  0); +   MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,1,fv, 0);     MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,2,f,  0);     MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,2,fv, 0); -   /* just add more */ +   MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,3,f,  0); +   MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,3,fv, 0); +   MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,4,f,  0); +   MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,4,fv, 0); +   MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,1,f,  0); +   MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,1,fv, 0);     MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,2,f,  0);     MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,2,fv, 0); -   /* just add more */ +   MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,3,f,  0); +   MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,3,fv, 0); +   MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,4,f,  0); +   MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,4,fv, 0); +} + + +/* Install the codegen'ed choosers. + * We should keep a list and free them in the end... + */ +void _tnl_x86choosers( attrfv_func (*choose)[4], +		       attrfv_func (*do_choose)( GLuint attr, +						 GLuint sz )) +{ +   int attr, size; + +   for (attr = 0; attr < _TNL_MAX_ATTR_CODEGEN; attr++) { +      for (size = 0; size < 4; size++) { +         char *code; +         char *start = (char *)&_tnl_x86_choose_fv; +         char *end = (char *)&_tnl_x86_choose_fv_end; +         int offset = 0; +         code = ALIGN_MALLOC( end - start, 16 ); +         memcpy (code, start, end - start); +         FIXUP(code, 0, 0, attr); +         FIXUP(code, 0, 1, size + 1); +         FIXUPREL(code, 0, 2, do_choose); +         choose[attr][size] = code; +      } +   }  }  #else  @@ -336,9 +392,19 @@ void _tnl_InitX86Codegen( struct _tnl_dynfn_generators *gen )     (void) gen;  } +  void _tnl_x86_exec_vtxfmt_init( GLcontext *ctx )  {     (void) ctx;  } + +void _tnl_x86choosers( attrfv_func (*choose)[4], +		       attrfv_func (*do_choose)( GLuint attr, +						 GLuint sz )) +{ +   (void) choose; +   (void) do_choose; +} +  #endif diff --git a/src/mesa/tnl/t_vtx_x86_gcc.S b/src/mesa/tnl/t_vtx_x86_gcc.S index 2a2e933f97..5a1adc0f33 100644 --- a/src/mesa/tnl/t_vtx_x86_gcc.S +++ b/src/mesa/tnl/t_vtx_x86_gcc.S @@ -36,16 +36,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.  .globl x;		\  x: -#define EXTRN( x )	x -  #else  /* defined(__DJGPP__) */  #define GLOBL( x )	\  .globl _##x;		\  _##x: -#define EXTRN( x )	_##x -  #endif /* defined(__DJGPP__) */  .data @@ -55,17 +51,22 @@ _##x:  // macro to note current offsets, etc in a special region of the  // object file & just make everything work out neat.  I don't know  // enough to do that... -	 -#define SUBST( x ) (0x10101010 + x)	 -	 + +#define SUBST( x ) (0x10101010 + x) +  // [dBorca] TODO  // Unfold functions for each vertex size?  // Build super-specialized MMX/SSE versions? +// STDCALL woes (HAVE_NONSTANDARD_GLAPIENTRY): +//   need separate routine for the non "fv" case, +//   to clean up the stack (I guess we could codegen +//   'ret nn' insn)! Also we need to call notify, then +//   return, instead of jump!  GLOBL ( _tnl_x86_Vertex1fv ) -	movl    4(%esp), %ecx	 +	movl	4(%esp), %ecx  	push	%edi  	push	%esi  	movl	SUBST(0), %edi	// 0x0 --> tnl->vtx.vbptr @@ -75,7 +76,7 @@ GLOBL ( _tnl_x86_Vertex1fv )  	movl	$SUBST(1), %ecx	// 0x1 --> (tnl->vtx.vertex_size - 1)  	movl	$SUBST(2), %esi	// 0x2 --> (tnl->vtx.vertex + 1)  	repz -	movsl   %ds:(%esi), %es:(%edi) +	movsl	%ds:(%esi), %es:(%edi)  	movl	%edi, SUBST(0)	// 0x0 --> tnl->vtx.vbptr  	movl	SUBST(3), %edx	// 0x3 --> counter  	pop	%esi @@ -90,7 +91,7 @@ GLOBL ( _tnl_x86_Vertex1fv_end )  .align 4  GLOBL ( _tnl_x86_Vertex2fv ) -	movl    4(%esp), %ecx	 +	movl	4(%esp), %ecx  	push	%edi  	push	%esi  	movl	SUBST(0), %edi	// load tnl->vtx.vbptr @@ -102,7 +103,7 @@ GLOBL ( _tnl_x86_Vertex2fv )  	movl	$SUBST(1), %ecx	// vertex_size - 2  	movl	$SUBST(2), %esi	// tnl->vtx.vertex + 2  	repz -	movsl %ds:(%esi), %es:(%edi) +	movsl	%ds:(%esi), %es:(%edi)  	movl	%edi, SUBST(0)	// save tnl->vtx.vbptr  	movl	SUBST(3), %edx	// load counter  	pop	%esi @@ -116,7 +117,7 @@ GLOBL ( _tnl_x86_Vertex2fv_end )  .align 4  GLOBL ( _tnl_x86_Vertex3fv ) -	movl    4(%esp), %ecx	 +	movl	4(%esp), %ecx  	push	%edi  	push	%esi  	movl	SUBST(0), %edi	// load tnl->vtx.vbptr @@ -130,7 +131,7 @@ GLOBL ( _tnl_x86_Vertex3fv )  	movl	$SUBST(1), %ecx	// vertex_size - 3  	movl	$SUBST(2), %esi	// tnl->vtx.vertex + 3  	repz -	movsl %ds:(%esi), %es:(%edi) +	movsl	%ds:(%esi), %es:(%edi)  	movl	%edi, SUBST(0)	// save tnl->vtx.vbptr  	movl	SUBST(3), %edx	// load counter  	pop	%esi @@ -142,10 +143,10 @@ GLOBL ( _tnl_x86_Vertex3fv )  	ret			// return  GLOBL ( _tnl_x86_Vertex3fv_end ) -			 +  .align 4  GLOBL ( _tnl_x86_Vertex4fv ) -	movl    4(%esp), %ecx	 +	movl	4(%esp), %ecx  	push	%edi  	push	%esi  	movl	SUBST(0), %edi	// load tnl->vtx.vbptr @@ -161,7 +162,7 @@ GLOBL ( _tnl_x86_Vertex4fv )  	movl	$SUBST(1), %ecx	// vertex_size - 4  	movl	$SUBST(2), %esi	// tnl->vtx.vertex + 3  	repz -	movsl   %ds:(%esi), %es:(%edi) +	movsl	%ds:(%esi), %es:(%edi)  	movl	%edi, SUBST(0)	// save tnl->vtx.vbptr  	movl	SUBST(3), %edx	// load counter  	pop	%esi @@ -174,49 +175,49 @@ GLOBL ( _tnl_x86_Vertex4fv )  GLOBL ( _tnl_x86_Vertex4fv_end ) -	 +  /**   * Generic handlers for vector format data.    */  GLOBL( _tnl_x86_Attribute1fv) -	movl 4(%esp), %ecx	 -	movl (%ecx), %eax       /* load v[0] */ -	movl %eax, SUBST(0)    	/* store v[0] to current vertex */ +	movl	4(%esp), %ecx +	movl	(%ecx), %eax	/* load v[0] */ +	movl	%eax, SUBST(0)	/* store v[0] to current vertex */  	ret  GLOBL ( _tnl_x86_Attribute1fv_end )  GLOBL( _tnl_x86_Attribute2fv) -	movl 4(%esp), %ecx	 -	movl (%ecx), %eax       /* load v[0] */ -	movl 4(%ecx), %edx      /* load v[1] */ -	movl %eax, SUBST(0)    	/* store v[0] to current vertex */ -	movl %edx, SUBST(1)    	/* store v[1] to current vertex */ +	movl	4(%esp), %ecx +	movl	(%ecx), %eax	/* load v[0] */ +	movl	4(%ecx), %edx	/* load v[1] */ +	movl	%eax, SUBST(0)	/* store v[0] to current vertex */ +	movl	%edx, SUBST(1)	/* store v[1] to current vertex */  	ret  GLOBL ( _tnl_x86_Attribute2fv_end )  GLOBL( _tnl_x86_Attribute3fv) -	movl 4(%esp), %ecx	 -	movl (%ecx), %eax       /* load v[0] */ -	movl 4(%ecx), %edx      /* load v[1] */ -	movl 8(%ecx), %ecx      /* load v[2] */ -	movl %eax, SUBST(0)    	/* store v[0] to current vertex */ -	movl %edx, SUBST(1)    	/* store v[1] to current vertex */ -	movl %ecx, SUBST(2)   	/* store v[2] to current vertex */ +	movl	4(%esp), %ecx +	movl	(%ecx), %eax	/* load v[0] */ +	movl	4(%ecx), %edx	/* load v[1] */ +	movl	8(%ecx), %ecx	/* load v[2] */ +	movl	%eax, SUBST(0)	/* store v[0] to current vertex */ +	movl	%edx, SUBST(1)	/* store v[1] to current vertex */ +	movl	%ecx, SUBST(2)	/* store v[2] to current vertex */  	ret  GLOBL ( _tnl_x86_Attribute3fv_end )  GLOBL( _tnl_x86_Attribute4fv) -	movl 4(%esp), %ecx	 -	movl (%ecx), %eax       /* load v[0] */ -	movl 4(%ecx), %edx      /* load v[1] */ -	movl %eax, SUBST(0)    	/* store v[0] to current vertex */ -	movl %edx, SUBST(1)    	/* store v[1] to current vertex */ -	movl 8(%ecx), %eax      /* load v[2] */ -	movl 12(%ecx), %edx     /* load v[3] */ -	movl %eax, SUBST(2)    	/* store v[2] to current vertex */ -	movl %edx, SUBST(3)    	/* store v[3] to current vertex */ +	movl	4(%esp), %ecx +	movl	(%ecx), %eax	/* load v[0] */ +	movl	4(%ecx), %edx	/* load v[1] */ +	movl	%eax, SUBST(0)	/* store v[0] to current vertex */ +	movl	%edx, SUBST(1)	/* store v[1] to current vertex */ +	movl	8(%ecx), %eax	/* load v[2] */ +	movl	12(%ecx), %edx	/* load v[3] */ +	movl	%eax, SUBST(2)	/* store v[2] to current vertex */ +	movl	%edx, SUBST(3)	/* store v[3] to current vertex */  	ret  GLOBL ( _tnl_x86_Attribute4fv_end ) @@ -225,29 +226,24 @@ GLOBL ( _tnl_x86_Attribute4fv_end )  // Must generate all of these ahead of first usage.  Generate at  // compile-time?   -	 -// NOT CURRENTLY USED  GLOBL( _tnl_x86_choose_fv)  	subl	$12, %esp	// gcc does 16 byte alignment of stack frames?  	movl	$SUBST(0), (%esp)	// arg 0 - attrib  	movl	$SUBST(1), 4(%esp)	// arg 1 - N -	call    EXTRN(_do_choose)	// new function returned in %eax -	add     $12, %esp		// tear down stack frame -	jmp     *%eax			// jump to new func -GLOBL ( _tnl_x86_choosefv_end ) -	 -	 +	.byte	0xe8			// call ... +	.long	SUBST(2)		// ... do_choose +	add	$12, %esp		// tear down stack frame +	jmp	*%eax			// jump to new func +GLOBL ( _tnl_x86_choose_fv_end ) +  // FIRST LEVEL FUNCTIONS -- these are plugged directly into GL dispatch. -	 -// NOT CURRENTLY USED -	 -		 +  // In the 1st level dispatch functions, switch to a different  // calling convention -- (const GLfloat *v) in %ecx.  //  @@ -256,7 +252,7 @@ GLOBL ( _tnl_x86_choosefv_end )  // back to the original caller. -	 +  // Vertex/Normal/Color, etc: the address of the function pointer  // is known at codegen time. @@ -282,6 +278,13 @@ GLOBL( _tnl_x86_dispatch_attrfv_end )  // MultiTexcoord: the address of the function pointer must be  // calculated, but can use the index argument slot to hold 'v', and  // avoid setting up a new stack frame. +// +// [dBorca] +// right, this would be the preferred approach, but gcc does not +// clean up the stack after each function call when optimizing (-fdefer-pop); +// can it make assumptions about what's already on the stack?  I dunno, +// but in this case, we can't mess with the caller's stack frame, and +// we must use a model like `_x86_dispatch_attrfv' above.  Caveat emptor!  // Also, will only need a maximum of four of each of these per context:  //  @@ -302,15 +305,16 @@ GLOBL( _tnl_x86_dispatch_multitexcoordfv )  	sall	$4, %ecx  	jmp	*SUBST(0)(%ecx)	// 0x0 - tabfv[tex0][n]  GLOBL( _tnl_x86_dispatch_multitexcoordfv_end ) -				 +  // VertexAttrib: the address of the function pointer must be  // calculated.  GLOBL( _tnl_x86_dispatch_vertexattribf ) -	movl	$16, %ecx  	movl	4(%esp), %eax  	cmpl	$16, %eax -	cmovge	%ecx, %eax	// [dBorca] BADBAD! might not be supported +	jb	.0		// "cmovge" is not supported on all CPUs +	movl	$16, %eax +.0:  	leal	8(%esp), %ecx	// calculate 'v'  	movl	%ecx, 4(%esp)	// save in 1st arg slot  	sall	$4, %eax @@ -318,13 +322,13 @@ GLOBL( _tnl_x86_dispatch_vertexattribf )  GLOBL( _tnl_x86_dispatch_vertexattribf_end )  GLOBL( _tnl_x86_dispatch_vertexattribfv ) -	movl	$16, %ecx  	movl	4(%esp), %eax  	cmpl	$16, %eax -	cmovge	%ecx, %eax	// [dBorca] BADBAD! might not be supported +	jb	.1		// "cmovge" is not supported on all CPUs +	movl	$16, %eax +.1:  	movl	8(%esp), %ecx	// load 'v'  	movl	%ecx, 4(%esp)	// save in 1st arg slot  	sall	$4, %eax  	jmp	*SUBST(0)(%eax)	// 0x0 - tabfv[0][n]  GLOBL( _tnl_x86_dispatch_vertexattribfv_end ) -  | 
