From 18a74321aa825c355392f98f1563a971871794cc Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 19 May 2005 20:25:32 +0000 Subject: Invalidate current fastpath on changes to attribute size or offset within the vertex. Use existing facilities to check for sse2 and enable when available. Turn on SSE/SSE2 codegen for t_vertex.c by default when USE_SSE_ASM is defined. Disable with "MESA_NO_CODEGEN=t". --- src/mesa/tnl/t_context.h | 2 ++ src/mesa/tnl/t_vertex.c | 10 +++++++--- src/mesa/tnl/t_vertex_sse.c | 27 +++++++++++++++++++++++---- 3 files changed, 32 insertions(+), 7 deletions(-) (limited to 'src/mesa/tnl') diff --git a/src/mesa/tnl/t_context.h b/src/mesa/tnl/t_context.h index 1d94174b7d..4988920cf2 100644 --- a/src/mesa/tnl/t_context.h +++ b/src/mesa/tnl/t_context.h @@ -570,7 +570,9 @@ struct tnl_clipspace_fastpath { struct { GLuint format; + GLuint size; GLuint stride; + GLuint offset; } *attr; tnl_emit_func func; diff --git a/src/mesa/tnl/t_vertex.c b/src/mesa/tnl/t_vertex.c index f4847f7578..bed3cf1879 100644 --- a/src/mesa/tnl/t_vertex.c +++ b/src/mesa/tnl/t_vertex.c @@ -46,7 +46,9 @@ static GLboolean match_fastpath( struct tnl_clipspace *vtx, return GL_FALSE; for (j = 0; j < vtx->attr_count; j++) - if (vtx->attr[j].format != fp->attr[j].format) + if (vtx->attr[j].format != fp->attr[j].format || + vtx->attr[j].inputsize != fp->attr[j].size || + vtx->attr[j].vertoffset != fp->attr[j].offset) return GL_FALSE; if (fp->match_strides) { @@ -90,6 +92,8 @@ void _tnl_register_fastpath( struct tnl_clipspace *vtx, for (i = 0; i < vtx->attr_count; i++) { fastpath->attr[i].format = vtx->attr[i].format; fastpath->attr[i].stride = vtx->attr[i].inputstride; + fastpath->attr[i].size = vtx->attr[i].inputsize; + fastpath->attr[i].offset = vtx->attr[i].vertoffset; } fastpath->next = vtx->fastpath; @@ -470,8 +474,8 @@ void _tnl_init_vertices( GLcontext *ctx, vtx->codegen_emit = NULL; -#ifdef __i386__ - if (getenv("MESA_EXPERIMENTAL")) +#ifdef USE_SSE_ASM + if (!_mesa_getenv("MESA_NO_CODEGEN")) vtx->codegen_emit = _tnl_generate_sse_emit; #endif } diff --git a/src/mesa/tnl/t_vertex_sse.c b/src/mesa/tnl/t_vertex_sse.c index d1a9f78651..d4eefdb6fc 100644 --- a/src/mesa/tnl/t_vertex_sse.c +++ b/src/mesa/tnl/t_vertex_sse.c @@ -33,12 +33,14 @@ #include "simple_list.h" #include "enums.h" +#if defined(USE_X86_ASM) + #define X 0 #define Y 1 #define Z 2 #define W 3 -#define DISASSEM 1 +#define DISASSEM 0 struct x86_reg { GLuint file:3; @@ -1208,18 +1210,26 @@ static GLboolean build_vertex_emit( struct x86_program *p ) return GL_TRUE; } +#include "x86/common_x86_asm.h" + + void _tnl_generate_sse_emit( GLcontext *ctx ) { struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); - struct x86_program p; + struct x86_program p; + + if (!cpu_has_xmm) { + vtx->codegen_emit = NULL; + return; + } memset(&p, 0, sizeof(p)); p.ctx = ctx; p.store = MALLOC(1024); - p.inputs_safe = 1; /* for now */ + p.inputs_safe = 0; /* for now */ p.outputs_safe = 1; /* for now */ - p.have_sse2 = 1; /* testing */ + p.have_sse2 = cpu_has_xmm2; p.identity = make_reg(file_XMM, 6); p.chan0 = make_reg(file_XMM, 7); @@ -1246,3 +1256,12 @@ void _tnl_generate_sse_emit( GLcontext *ctx ) (void)sse2_packsswb; (void)sse2_pshufd; } + +#else + +void _tnl_generate_sse_emit( GLcontext *ctx ) +{ + /* Dummy version for when USE_SSE_ASM not defined */ +} + +#endif -- cgit v1.2.3