From e5c7f44009cdc9817d7026fda2a3fadbba0e65df Mon Sep 17 00:00:00 2001 From: Daniel Borca Date: Thu, 1 Apr 2004 06:53:22 +0000 Subject: codegen'ed versions of the 2nd level dispatch --- src/mesa/tnl/t_vtx_api.c | 1 + src/mesa/tnl/t_vtx_api.h | 2 ++ src/mesa/tnl/t_vtx_generic.c | 3 +- src/mesa/tnl/t_vtx_x86.c | 73 ++++++++++++++++++++++++++++++++++++++++++-- src/mesa/tnl/t_vtx_x86_gcc.S | 34 ++++++++++++++++----- 5 files changed, 101 insertions(+), 12 deletions(-) diff --git a/src/mesa/tnl/t_vtx_api.c b/src/mesa/tnl/t_vtx_api.c index 6dcd8b43a0..807d99952f 100644 --- a/src/mesa/tnl/t_vtx_api.c +++ b/src/mesa/tnl/t_vtx_api.c @@ -901,6 +901,7 @@ void _tnl_vtx_init( GLcontext *ctx ) _tnl_current_init( ctx ); _tnl_exec_vtxfmt_init( ctx ); _tnl_generic_exec_vtxfmt_init( ctx ); + _tnl_x86_exec_vtxfmt_init( ctx ); /* [dBorca] x86 DISPATCH_ATTRFV */ _mesa_install_exec_vtxfmt( ctx, &tnl->exec_vtxfmt ); diff --git a/src/mesa/tnl/t_vtx_api.h b/src/mesa/tnl/t_vtx_api.h index 85cc33f071..46700fcd0a 100644 --- a/src/mesa/tnl/t_vtx_api.h +++ b/src/mesa/tnl/t_vtx_api.h @@ -78,6 +78,8 @@ extern void _tnl_generic_attr_table_init( attrfv_func (*tab)[4] ); */ extern void _tnl_InitX86Codegen( struct _tnl_dynfn_generators *gen ); +extern void _tnl_x86_exec_vtxfmt_init( GLcontext *ctx ); + diff --git a/src/mesa/tnl/t_vtx_generic.c b/src/mesa/tnl/t_vtx_generic.c index 25dd07a52c..00dd2e8907 100644 --- a/src/mesa/tnl/t_vtx_generic.c +++ b/src/mesa/tnl/t_vtx_generic.c @@ -409,8 +409,7 @@ static void GLAPIENTRY _tnl_VertexAttrib4fvNV( GLuint index, /* Install the generic versions of the 2nd level dispatch functions. - * There's currently no codegen alternative to these, though one is in - * the works. + * [dBorca] Some of these have a codegen alternative. */ void _tnl_generic_exec_vtxfmt_init( GLcontext *ctx ) { diff --git a/src/mesa/tnl/t_vtx_x86.c b/src/mesa/tnl/t_vtx_x86.c index 6ff1a52743..3f3a198a24 100644 --- a/src/mesa/tnl/t_vtx_x86.c +++ b/src/mesa/tnl/t_vtx_x86.c @@ -59,8 +59,6 @@ EXTERN( _x86_Vertex2fv ); EXTERN( _x86_Vertex3fv ); EXTERN( _x86_Vertex4fv ); -/* None of these used yet: - */ EXTERN( _x86_dispatch_attrf ); EXTERN( _x86_dispatch_attrfv ); EXTERN( _x86_dispatch_multitexcoordf ); @@ -260,6 +258,77 @@ void _do_choose( void ) { } + +/* [dBorca] I purposely avoided one single macro, since they might need to + * be handled in different ways. Ohwell, once things get much clearer, they + * could collapse... + */ +#define MAKE_DISPATCH_ATTR(FUNC, SIZE, TYPE, ATTR) \ +do { \ + char *code; \ + char *start = (char *)&_x86_dispatch_attr##TYPE; \ + char *end = (char *)&_x86_dispatch_attr##TYPE##_end; \ + int offset = 0; \ + code = ALIGN_MALLOC( end - start, 16 ); \ + memcpy (code, start, end - start); \ + FIXUP(code, 0, 0, (int)&(TNL_CONTEXT(ctx)->vtx.tabfv[ATTR][SIZE-1]));\ + vfmt->FUNC##SIZE##TYPE = code; \ +} while (0) + + +#define MAKE_DISPATCH_MULTITEXCOORD(FUNC, SIZE, TYPE, ATTR) \ +do { \ + char *code; \ + char *start = (char *)&_x86_dispatch_multitexcoord##TYPE; \ + char *end = (char *)&_x86_dispatch_multitexcoord##TYPE##_end; \ + int offset = 0; \ + code = ALIGN_MALLOC( end - start, 16 ); \ + memcpy (code, start, end - start); \ + FIXUP(code, 0, 0, (int)&(TNL_CONTEXT(ctx)->vtx.tabfv[_TNL_ATTRIB_TEX0][SIZE-1]));\ + vfmt->FUNC##SIZE##TYPE##ARB = code; \ +} while (0) + + +#define MAKE_DISPATCH_VERTEXATTRIB(FUNC, SIZE, TYPE, ATTR) \ +do { \ + char *code; \ + char *start = (char *)&_x86_dispatch_vertexattrib##TYPE; \ + char *end = (char *)&_x86_dispatch_vertexattrib##TYPE##_end; \ + int offset = 0; \ + code = ALIGN_MALLOC( end - start, 16 ); \ + memcpy (code, start, end - start); \ + FIXUP(code, 0, 0, (int)&(TNL_CONTEXT(ctx)->vtx.tabfv[0][SIZE-1])); \ + vfmt->FUNC##SIZE##TYPE##NV = code; \ +} while (0) + +/* [dBorca] Install the codegen'ed versions of the 2nd level dispatch + * functions. We should keep a list and free them in the end... + */ +void _tnl_x86_exec_vtxfmt_init( GLcontext *ctx ) +{ + GLvertexformat *vfmt = &(TNL_CONTEXT(ctx)->exec_vtxfmt); + + MAKE_DISPATCH_ATTR(Color,3,f, _TNL_ATTRIB_COLOR0); + MAKE_DISPATCH_ATTR(Color,3,fv, _TNL_ATTRIB_COLOR0); + MAKE_DISPATCH_ATTR(Color,4,f, _TNL_ATTRIB_COLOR0); + MAKE_DISPATCH_ATTR(Color,4,fv, _TNL_ATTRIB_COLOR0); + MAKE_DISPATCH_ATTR(Normal,3,f, _TNL_ATTRIB_NORMAL); + MAKE_DISPATCH_ATTR(Normal,3,fv, _TNL_ATTRIB_NORMAL); + MAKE_DISPATCH_ATTR(TexCoord,2,f, _TNL_ATTRIB_TEX0); + MAKE_DISPATCH_ATTR(TexCoord,2,fv, _TNL_ATTRIB_TEX0); + MAKE_DISPATCH_ATTR(Vertex,3,f, _TNL_ATTRIB_POS); + MAKE_DISPATCH_ATTR(Vertex,3,fv, _TNL_ATTRIB_POS); + /* just add more */ + + MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,2,f, 0); + MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,2,fv, 0); + /* just add more */ + + MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,2,f, 0); + MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,2,fv, 0); + /* just add more */ +} + #else void _tnl_InitX86Codegen( struct _tnl_dynfn_generators *gen ) diff --git a/src/mesa/tnl/t_vtx_x86_gcc.S b/src/mesa/tnl/t_vtx_x86_gcc.S index 937b53bfd3..e932faff75 100644 --- a/src/mesa/tnl/t_vtx_x86_gcc.S +++ b/src/mesa/tnl/t_vtx_x86_gcc.S @@ -31,11 +31,25 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. */ #if defined(USE_X86_ASM) && !defined(HAVE_NONSTANDARD_GLAPIENTRY) - + +#if !defined (__DJGPP__) + #define GLOBL( x ) \ .globl x; \ x: +#define EXTRN( x ) x + +#else /* defined(__DJGPP__) */ + +#define GLOBL( x ) \ +.globl _##x; \ +_##x: + +#define EXTRN( x ) _##x + +#endif /* defined(__DJGPP__) */ + .data .align 4 @@ -48,6 +62,10 @@ x: +// [dBorca] TODO +// Unfold functions for each vertex size? +// Build super-specialized MMX/SSE versions? + GLOBL ( _x86_Vertex1fv ) movl 4(%esp), %ecx push %edi @@ -217,9 +235,9 @@ GLOBL( _x86_choose_fv) subl $12, %esp // gcc does 16 byte alignment of stack frames? movl $SUBST(0), (%esp) // arg 0 - attrib movl $SUBST(1), 4(%esp) // arg 1 - N - call _do_choose // new function returned in %eax - add $12, %esp // tear down stack frame - jmp *%eax // jump to new func + call EXTRN(_do_choose) // new function returned in %eax + add $12, %esp // tear down stack frame + jmp *%eax // jump to new func GLOBL ( _x86_choosefv_end ) @@ -251,7 +269,7 @@ GLOBL( _x86_dispatch_attrf ) subl $12, %esp // gcc does 16 byte alignment of stack frames? leal 16(%esp), %edx // address of first float on stack movl %edx, (%esp) // save as 'v' - call SUBST(0) // 0x0 --> tabfv[attr][n] + call *SUBST(0) // 0x0 --> tabfv[attr][n] addl $12, %esp // tear down frame ret // return GLOBL( _x86_dispatch_attrf_end ) @@ -259,7 +277,7 @@ GLOBL( _x86_dispatch_attrf_end ) // The fv case is simpler: // GLOBL( _x86_dispatch_attrfv ) - jmp SUBST(0) // 0x0 --> tabfv[attr][n] + jmp *SUBST(0) // 0x0 --> tabfv[attr][n] GLOBL( _x86_dispatch_attrfv_end ) @@ -294,7 +312,7 @@ GLOBL( _x86_dispatch_vertexattribf ) movl $16, %ecx movl 4(%esp), %eax cmpl $16, %eax - cmovge %ecx, %eax + cmovge %ecx, %eax // [dBorca] BADBAD! might not be supported leal 8(%esp), %ecx // calculate 'v' movl %ecx, 4(%esp) // save in 1st arg slot sall $4, %eax @@ -305,7 +323,7 @@ GLOBL( _x86_dispatch_vertexattribfv ) movl $16, %ecx movl 4(%esp), %eax cmpl $16, %eax - cmovge %ecx, %eax + cmovge %ecx, %eax // [dBorca] BADBAD! might not be supported movl 8(%esp), %ecx // load 'v' movl %ecx, 4(%esp) // save in 1st arg slot sall $4, %eax -- cgit v1.2.3