diff options
Diffstat (limited to 'src/mesa/tnl')
-rw-r--r-- | src/mesa/tnl/descrip.mms | 68 | ||||
-rw-r--r-- | src/mesa/tnl/sources | 34 | ||||
-rw-r--r-- | src/mesa/tnl/t_context.c | 50 | ||||
-rw-r--r-- | src/mesa/tnl/t_context.h | 1 | ||||
-rw-r--r-- | src/mesa/tnl/t_draw.c | 40 | ||||
-rw-r--r-- | src/mesa/tnl/t_pipeline.c | 2 | ||||
-rw-r--r-- | src/mesa/tnl/t_rasterpos.c | 32 | ||||
-rw-r--r-- | src/mesa/tnl/t_vb_fog.c | 26 | ||||
-rw-r--r-- | src/mesa/tnl/t_vb_light.c | 4 | ||||
-rw-r--r-- | src/mesa/tnl/t_vb_program.c | 324 | ||||
-rw-r--r-- | src/mesa/tnl/t_vb_texgen.c | 24 | ||||
-rw-r--r-- | src/mesa/tnl/t_vertex.c | 90 | ||||
-rw-r--r-- | src/mesa/tnl/t_vertex.h | 12 | ||||
-rw-r--r-- | src/mesa/tnl/t_vertex_generic.c | 84 | ||||
-rw-r--r-- | src/mesa/tnl/t_vertex_sse.c | 30 | ||||
-rw-r--r-- | src/mesa/tnl/t_vp_build.c | 4 | ||||
-rw-r--r-- | src/mesa/tnl/tnl.h | 5 |
17 files changed, 535 insertions, 295 deletions
diff --git a/src/mesa/tnl/descrip.mms b/src/mesa/tnl/descrip.mms new file mode 100644 index 0000000000..25dd1aecb1 --- /dev/null +++ b/src/mesa/tnl/descrip.mms @@ -0,0 +1,68 @@ +# Makefile for core library for VMS +# contributed by Jouk Jansen joukj@hrem.nano.tudelft.nl +# Last revision : 39 September 2008 + +.first + define gl [---.include.gl] + define math [-.math] + define vbo [-.vbo] + define shader [-.shader] + define swrast [-.swrast] + define array_cache [-.array_cache] + define main [-.main] + define glapi [-.glapi] + define tnl [-.tnl] + +.include [---]mms-config. + +##### MACROS ##### + +VPATH = RCS + +INCDIR = [---.include],[-.main],[-.glapi],[-.shader],[-.shader.slang] +LIBDIR = [---.lib] +CFLAGS = /include=($(INCDIR),[])/define=(PTHREADS=1)/name=(as_is,short)/float=ieee/ieee=denorm + +SOURCES = t_context.c t_draw.c \ + t_pipeline.c t_vb_fog.c \ + t_vb_light.c t_vb_normals.c t_vb_points.c t_vb_program.c \ + t_vb_render.c t_vb_texgen.c t_vb_texmat.c t_vb_vertex.c \ + t_vertex.c t_rasterpos.c\ + t_vertex_generic.c t_vp_build.c + +OBJECTS = t_context.obj,t_draw.obj,\ + t_pipeline.obj,t_vb_fog.obj,t_vb_light.obj,t_vb_normals.obj,\ + t_vb_points.obj,t_vb_program.obj,t_vb_render.obj,t_vb_texgen.obj,\ + t_vb_texmat.obj,t_vb_vertex.obj,t_rasterpos.obj,\ + t_vertex.obj,t_vertex_generic.obj,\ + t_vp_build.obj + +##### RULES ##### + +VERSION=Mesa V3.4 + +##### TARGETS ##### +# Make the library +$(LIBDIR)$(GL_LIB) : $(OBJECTS) + @ library $(LIBDIR)$(GL_LIB) $(OBJECTS) + +clean : + purge + delete *.obj;* + +t_context.obj : t_context.c +t_draw.obj : t_draw.c +t_pipeline.obj : t_pipeline.c +t_vb_fog.obj : t_vb_fog.c +t_vb_light.obj : t_vb_light.c +t_vb_normals.obj : t_vb_normals.c +t_vb_points.obj : t_vb_points.c +t_vb_program.obj : t_vb_program.c +t_vb_render.obj : t_vb_render.c +t_vb_texgen.obj : t_vb_texgen.c +t_vb_texmat.obj : t_vb_texmat.c +t_vb_vertex.obj : t_vb_vertex.c +t_vertex.obj : t_vertex.c +t_vertex_generic.obj : t_vertex_generic.c +t_vp_build.obj : t_vp_build.c +t_rasterpos.obj : t_rasterpos.c diff --git a/src/mesa/tnl/sources b/src/mesa/tnl/sources deleted file mode 100644 index a0888be11d..0000000000 --- a/src/mesa/tnl/sources +++ /dev/null @@ -1,34 +0,0 @@ -# List of source files in this directory used for X.org xserver build -MESA_TNL_SOURCES = \ -t_context.c \ -t_pipeline.c \ -t_vb_arbprogram.c \ -t_vb_arbprogram_sse.c \ -t_vb_arbshader.c \ -t_vb_cull.c \ -t_vb_fog.c \ -t_vb_light.c \ -t_vb_normals.c \ -t_vb_points.c \ -t_vb_program.c \ -t_vb_render.c \ -t_vb_texgen.c \ -t_vb_texmat.c \ -t_vb_vertex.c \ -t_vertex.c \ -t_vertex_generic.c \ -t_vertex_sse.c \ -t_vp_build.c - -MESA_TNL_HEADERS = \ -t_array_api.h \ -t_array_import.h \ -t_context.h \ -t_pipeline.h \ -t_vb_arbprogram.h \ -t_vb_cliptmp.h \ -t_vb_lighttmp.h \ -t_vb_rendertmp.h \ -t_vertex.h \ -t_vp_build.h \ -tnl.h diff --git a/src/mesa/tnl/t_context.c b/src/mesa/tnl/t_context.c index 0ace5c2d6f..f69b122046 100644 --- a/src/mesa/tnl/t_context.c +++ b/src/mesa/tnl/t_context.c @@ -1,8 +1,8 @@ /* * Mesa 3-D graphics library - * Version: 6.5.2 + * Version: 7.2 * - * Copyright (C) 1999-2006 Brian Paul All Rights Reserved. + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -32,6 +32,8 @@ #include "main/macros.h" #include "main/mtypes.h" #include "main/light.h" +#include "math/m_translate.h" +#include "math/m_xform.h" #include "tnl.h" #include "t_context.h" @@ -81,6 +83,9 @@ _tnl_CreateContext( GLcontext *ctx ) /* plug in the VBO drawing function */ vbo_set_draw_func(ctx, _tnl_draw_prims); + _math_init_transformation(); + _math_init_translate(); + return GL_TRUE; } @@ -104,43 +109,52 @@ _tnl_InvalidateState( GLcontext *ctx, GLuint new_state ) const struct gl_vertex_program *vp = ctx->VertexProgram._Current; const struct gl_fragment_program *fp = ctx->FragmentProgram._Current; - if (new_state & (_NEW_HINT)) { + if (new_state & (_NEW_HINT | _NEW_PROGRAM)) { ASSERT(tnl->AllowVertexFog || tnl->AllowPixelFog); - tnl->_DoVertexFog = (tnl->AllowVertexFog && (ctx->Hint.Fog != GL_NICEST)) - || !tnl->AllowPixelFog; + tnl->_DoVertexFog = ((tnl->AllowVertexFog && (ctx->Hint.Fog != GL_NICEST)) + || !tnl->AllowPixelFog) && !fp; } tnl->pipeline.new_state |= new_state; - /* Calculate tnl->render_inputs: + /* Calculate tnl->render_inputs. This bitmask indicates which vertex + * attributes need to be emitted to the rasterizer. */ if (ctx->Visual.rgbMode) { GLuint i; RENDERINPUTS_ZERO( tnl->render_inputs_bitset ); RENDERINPUTS_SET( tnl->render_inputs_bitset, _TNL_ATTRIB_POS ); + if (!fp || (fp->Base.InputsRead & FRAG_BIT_COL0)) { RENDERINPUTS_SET( tnl->render_inputs_bitset, _TNL_ATTRIB_COLOR0 ); } + + if (NEED_SECONDARY_COLOR(ctx)) + RENDERINPUTS_SET( tnl->render_inputs_bitset, _TNL_ATTRIB_COLOR1 ); + for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) { - if (ctx->Texture._EnabledCoordUnits & (1 << i)) { + if (ctx->Texture._EnabledCoordUnits & (1 << i) || + (fp && fp->Base.InputsRead & FRAG_BIT_TEX(i))) { RENDERINPUTS_SET( tnl->render_inputs_bitset, _TNL_ATTRIB_TEX(i) ); } } - - if (NEED_SECONDARY_COLOR(ctx)) - RENDERINPUTS_SET( tnl->render_inputs_bitset, _TNL_ATTRIB_COLOR1 ); } else { RENDERINPUTS_SET( tnl->render_inputs_bitset, _TNL_ATTRIB_POS ); RENDERINPUTS_SET( tnl->render_inputs_bitset, _TNL_ATTRIB_COLOR_INDEX ); } - if (ctx->Fog.Enabled || - ((ctx->FragmentProgram._Active || ctx->FragmentProgram._Current) && - (ctx->FragmentProgram._Current->FogOption != GL_NONE || - (ctx->FragmentProgram._Current->Base.InputsRead & FRAG_BIT_FOGC)))) + if (ctx->Fog.Enabled) { + /* fixed-function fog */ RENDERINPUTS_SET( tnl->render_inputs_bitset, _TNL_ATTRIB_FOG ); + } + else if (fp) { + if (fp->FogOption != GL_NONE || (fp->Base.InputsRead & FRAG_BIT_FOGC)) { + /* fragment program needs fog coord */ + RENDERINPUTS_SET( tnl->render_inputs_bitset, _TNL_ATTRIB_FOG ); + } + } if (ctx->Polygon.FrontMode != GL_FILL || ctx->Polygon.BackMode != GL_FILL) @@ -201,8 +215,8 @@ _tnl_allow_vertex_fog( GLcontext *ctx, GLboolean value ) { TNLcontext *tnl = TNL_CONTEXT(ctx); tnl->AllowVertexFog = value; - tnl->_DoVertexFog = (tnl->AllowVertexFog && (ctx->Hint.Fog != GL_NICEST)) - || !tnl->AllowPixelFog; + tnl->_DoVertexFog = ((tnl->AllowVertexFog && (ctx->Hint.Fog != GL_NICEST)) + || !tnl->AllowPixelFog) && !ctx->FragmentProgram._Current; } @@ -211,7 +225,7 @@ _tnl_allow_pixel_fog( GLcontext *ctx, GLboolean value ) { TNLcontext *tnl = TNL_CONTEXT(ctx); tnl->AllowPixelFog = value; - tnl->_DoVertexFog = (tnl->AllowVertexFog && (ctx->Hint.Fog != GL_NICEST)) - || !tnl->AllowPixelFog; + tnl->_DoVertexFog = ((tnl->AllowVertexFog && (ctx->Hint.Fog != GL_NICEST)) + || !tnl->AllowPixelFog) && !ctx->FragmentProgram._Current; } diff --git a/src/mesa/tnl/t_context.h b/src/mesa/tnl/t_context.h index 0a6ce04614..c19eb3df3c 100644 --- a/src/mesa/tnl/t_context.h +++ b/src/mesa/tnl/t_context.h @@ -50,6 +50,7 @@ #define _T_CONTEXT_H #include "main/glheader.h" +#include "main/bitset.h" #include "main/mtypes.h" #include "math/m_matrix.h" diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c index a1a46f6b82..2ec65d5323 100644 --- a/src/mesa/tnl/t_draw.c +++ b/src/mesa/tnl/t_draw.c @@ -1,9 +1,8 @@ - /* * Mesa 3-D graphics library - * Version: 6.5 + * Version: 7.1 * - * Copyright (C) 1999-2006 Brian Paul All Rights Reserved. + * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -29,7 +28,7 @@ #include "main/glheader.h" #include "main/context.h" #include "main/imports.h" -#include "state.h" +#include "main/state.h" #include "main/mtypes.h" #include "main/macros.h" #include "main/enums.h" @@ -89,6 +88,29 @@ static void free_space(GLcontext *ctx) } while (0) +/** + * Convert array of BGRA/GLubyte[4] values to RGBA/float[4] + * \param ptr input/ubyte array + * \param fptr output/float array + */ +static void +convert_bgra_to_float(const struct gl_client_array *input, + const GLubyte *ptr, GLfloat *fptr, + GLuint count ) +{ + GLuint i; + assert(input->Normalized); + assert(input->Size == 4); + for (i = 0; i < count; i++) { + const GLubyte *in = (GLubyte *) ptr; /* in is in BGRA order */ + *fptr++ = UBYTE_TO_FLOAT(in[2]); /* red */ + *fptr++ = UBYTE_TO_FLOAT(in[1]); /* green */ + *fptr++ = UBYTE_TO_FLOAT(in[0]); /* blue */ + *fptr++ = UBYTE_TO_FLOAT(in[3]); /* alpha */ + ptr += input->StrideB; + } +} + /* Adjust pointer to point at first requested element, convert to * floating point, populate VB->AttribPtr[]. @@ -113,7 +135,13 @@ static void _tnl_import_array( GLcontext *ctx, CONVERT(GLbyte, BYTE_TO_FLOAT); break; case GL_UNSIGNED_BYTE: - CONVERT(GLubyte, UBYTE_TO_FLOAT); + if (input->Format == GL_BGRA) { + /* See GL_EXT_vertex_array_bgra */ + convert_bgra_to_float(input, ptr, fptr, count); + } + else { + CONVERT(GLubyte, UBYTE_TO_FLOAT); + } break; case GL_SHORT: CONVERT(GLshort, SHORT_TO_FLOAT); @@ -368,7 +396,7 @@ void _tnl_draw_prims( GLcontext *ctx, _tnl_draw_prims ); return; } - else if (max_index >= max) { + else if (max_index > max) { /* The software TNL pipeline has a fixed amount of storage for * vertices and it is necessary to split incoming drawing commands * if they exceed that limit. diff --git a/src/mesa/tnl/t_pipeline.c b/src/mesa/tnl/t_pipeline.c index 2a0ed8852a..357ef1e24b 100644 --- a/src/mesa/tnl/t_pipeline.c +++ b/src/mesa/tnl/t_pipeline.c @@ -199,11 +199,11 @@ const struct tnl_pipeline_stage *_tnl_default_pipeline[] = { &_tnl_vertex_transform_stage, &_tnl_normal_transform_stage, &_tnl_lighting_stage, - &_tnl_fog_coordinate_stage, &_tnl_texgen_stage, &_tnl_texture_transform_stage, &_tnl_point_attenuation_stage, &_tnl_vertex_program_stage, + &_tnl_fog_coordinate_stage, &_tnl_render_stage, NULL }; diff --git a/src/mesa/tnl/t_rasterpos.c b/src/mesa/tnl/t_rasterpos.c index dfbe0f1806..04fb1d8f8c 100644 --- a/src/mesa/tnl/t_rasterpos.c +++ b/src/mesa/tnl/t_rasterpos.c @@ -26,11 +26,11 @@ #include "main/glheader.h" #include "main/colormac.h" #include "main/context.h" -#include "feedback.h" -#include "light.h" +#include "main/feedback.h" +#include "main/light.h" #include "main/macros.h" -#include "rastpos.h" -#include "simple_list.h" +#include "main/rastpos.h" +#include "main/simple_list.h" #include "main/mtypes.h" #include "math/m_matrix.h" @@ -301,12 +301,12 @@ compute_texgen(GLcontext *ctx, const GLfloat vObj[4], const GLfloat vEye[4], mInv = 0.0F; if (texUnit->TexGenEnabled & S_BIT) { - switch (texUnit->GenModeS) { + switch (texUnit->GenS.Mode) { case GL_OBJECT_LINEAR: - texcoord[0] = DOT4(vObj, texUnit->ObjectPlaneS); + texcoord[0] = DOT4(vObj, texUnit->GenS.ObjectPlane); break; case GL_EYE_LINEAR: - texcoord[0] = DOT4(vEye, texUnit->EyePlaneS); + texcoord[0] = DOT4(vEye, texUnit->GenS.EyePlane); break; case GL_SPHERE_MAP: texcoord[0] = rx * mInv + 0.5F; @@ -324,12 +324,12 @@ compute_texgen(GLcontext *ctx, const GLfloat vObj[4], const GLfloat vEye[4], } if (texUnit->TexGenEnabled & T_BIT) { - switch (texUnit->GenModeT) { + switch (texUnit->GenT.Mode) { case GL_OBJECT_LINEAR: - texcoord[1] = DOT4(vObj, texUnit->ObjectPlaneT); + texcoord[1] = DOT4(vObj, texUnit->GenT.ObjectPlane); break; case GL_EYE_LINEAR: - texcoord[1] = DOT4(vEye, texUnit->EyePlaneT); + texcoord[1] = DOT4(vEye, texUnit->GenT.EyePlane); break; case GL_SPHERE_MAP: texcoord[1] = ry * mInv + 0.5F; @@ -347,12 +347,12 @@ compute_texgen(GLcontext *ctx, const GLfloat vObj[4], const GLfloat vEye[4], } if (texUnit->TexGenEnabled & R_BIT) { - switch (texUnit->GenModeR) { + switch (texUnit->GenR.Mode) { case GL_OBJECT_LINEAR: - texcoord[2] = DOT4(vObj, texUnit->ObjectPlaneR); + texcoord[2] = DOT4(vObj, texUnit->GenR.ObjectPlane); break; case GL_EYE_LINEAR: - texcoord[2] = DOT4(vEye, texUnit->EyePlaneR); + texcoord[2] = DOT4(vEye, texUnit->GenR.EyePlane); break; case GL_REFLECTION_MAP: texcoord[2] = rz; @@ -367,12 +367,12 @@ compute_texgen(GLcontext *ctx, const GLfloat vObj[4], const GLfloat vEye[4], } if (texUnit->TexGenEnabled & Q_BIT) { - switch (texUnit->GenModeQ) { + switch (texUnit->GenQ.Mode) { case GL_OBJECT_LINEAR: - texcoord[3] = DOT4(vObj, texUnit->ObjectPlaneQ); + texcoord[3] = DOT4(vObj, texUnit->GenQ.ObjectPlane); break; case GL_EYE_LINEAR: - texcoord[3] = DOT4(vEye, texUnit->EyePlaneQ); + texcoord[3] = DOT4(vEye, texUnit->GenQ.EyePlane); break; default: _mesa_problem(ctx, "Bad Q texgen in compute_texgen()"); diff --git a/src/mesa/tnl/t_vb_fog.c b/src/mesa/tnl/t_vb_fog.c index 00c0979f3f..f3a7bd49f4 100644 --- a/src/mesa/tnl/t_vb_fog.c +++ b/src/mesa/tnl/t_vb_fog.c @@ -41,7 +41,6 @@ struct fog_stage_data { GLvector4f fogcoord; /* has actual storage allocated */ - GLvector4f input; /* points into VB->EyePtr Z values */ }; #define FOG_STAGE_DATA(stage) ((struct fog_stage_data *)stage->privatePtr) @@ -91,7 +90,8 @@ init_static_data( void ) * evaluating the GL_LINEAR, GL_EXP or GL_EXP2 fog function. * Fog coordinates are distances from the eye (typically between the * near and far clip plane distances). - * Note the fog (eye Z) coords may be negative so we use ABS(z) below. + * Note that fogcoords may be negative, if eye z is source absolute + * value must be taken earlier. * Fog blend factors are in the range [0,1]. */ static void @@ -148,11 +148,11 @@ run_fog_stage(GLcontext *ctx, struct tnl_pipeline_stage *stage) struct fog_stage_data *store = FOG_STAGE_DATA(stage); GLvector4f *input; - if (!ctx->Fog.Enabled || ctx->VertexProgram._Current) - return GL_TRUE; + if (!ctx->Fog.Enabled) + return GL_TRUE; - if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT) { + if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT && !ctx->VertexProgram._Current) { GLuint i; GLfloat *coord; /* Fog is computed from vertex or fragment Z values */ @@ -169,13 +169,10 @@ run_fog_stage(GLcontext *ctx, struct tnl_pipeline_stage *stage) */ input = &store->fogcoord; - /* NOTE: negate plane here so we get positive fog coords! */ - /* NOTE2: this doesn't always work (tests/fog - all frag depth fog - coords will be negative). */ - plane[0] = -m[2]; - plane[1] = -m[6]; - plane[2] = -m[10]; - plane[3] = -m[14]; + plane[0] = m[2]; + plane[1] = m[6]; + plane[2] = m[10]; + plane[3] = m[14]; /* Full eye coords weren't required, just calculate the * eye Z values. */ @@ -189,12 +186,12 @@ run_fog_stage(GLcontext *ctx, struct tnl_pipeline_stage *stage) NOTE should avoid going through array twice */ coord = input->start; for (i = 0; i < input->count; i++) { - input->data[i][0] = FABSF(*coord); + *coord = FABSF(*coord); STRIDE_F(coord, input->stride); } } else { - /* fog coordinates = eye Z coordinates (use ABS later) */ + /* fog coordinates = eye Z coordinates - need to copy for ABS */ input = &store->fogcoord; if (VB->EyePtr->size < 2) @@ -249,7 +246,6 @@ alloc_fog_data(GLcontext *ctx, struct tnl_pipeline_stage *stage) return GL_FALSE; _mesa_vector4f_alloc( &store->fogcoord, 0, tnl->vb.Size, 32 ); - _mesa_vector4f_init( &store->input, 0, NULL ); if (!inited) init_static_data(); diff --git a/src/mesa/tnl/t_vb_light.c b/src/mesa/tnl/t_vb_light.c index ebd3412d20..f47f99397c 100644 --- a/src/mesa/tnl/t_vb_light.c +++ b/src/mesa/tnl/t_vb_light.c @@ -26,10 +26,10 @@ #include "main/glheader.h" #include "main/colormac.h" -#include "light.h" +#include "main/light.h" #include "main/macros.h" #include "main/imports.h" -#include "simple_list.h" +#include "main/simple_list.h" #include "main/mtypes.h" #include "math/m_translate.h" diff --git a/src/mesa/tnl/t_vb_program.c b/src/mesa/tnl/t_vb_program.c index c778f5d248..f99401ca6d 100644 --- a/src/mesa/tnl/t_vb_program.c +++ b/src/mesa/tnl/t_vb_program.c @@ -1,6 +1,6 @@ /* * Mesa 3-D graphics library - * Version: 6.5.3 + * Version: 7.1 * * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. * @@ -41,9 +41,129 @@ #include "swrast/s_context.h" #include "swrast/s_texfilter.h" -#include "tnl.h" -#include "t_context.h" -#include "t_pipeline.h" +#include "tnl/tnl.h" +#include "tnl/t_context.h" +#include "tnl/t_pipeline.h" + + + +/*! + * Private storage for the vertex program pipeline stage. + */ +struct vp_stage_data { + /** The results of running the vertex program go into these arrays. */ + GLvector4f results[VERT_RESULT_MAX]; + + GLvector4f ndcCoords; /**< normalized device coords */ + GLubyte *clipmask; /**< clip flags */ + GLubyte ormask, andmask; /**< for clipping */ +}; + + +#define VP_STAGE_DATA(stage) ((struct vp_stage_data *)(stage->privatePtr)) + + +static void +userclip( GLcontext *ctx, + GLvector4f *clip, + GLubyte *clipmask, + GLubyte *clipormask, + GLubyte *clipandmask ) +{ + GLuint p; + + for (p = 0; p < ctx->Const.MaxClipPlanes; p++) { + if (ctx->Transform.ClipPlanesEnabled & (1 << p)) { + GLuint nr, i; + const GLfloat a = ctx->Transform._ClipUserPlane[p][0]; + const GLfloat b = ctx->Transform._ClipUserPlane[p][1]; + const GLfloat c = ctx->Transform._ClipUserPlane[p][2]; + const GLfloat d = ctx->Transform._ClipUserPlane[p][3]; + GLfloat *coord = (GLfloat *)clip->data; + GLuint stride = clip->stride; + GLuint count = clip->count; + + for (nr = 0, i = 0 ; i < count ; i++) { + GLfloat dp = (coord[0] * a + + coord[1] * b + + coord[2] * c + + coord[3] * d); + + if (dp < 0) { + nr++; + clipmask[i] |= CLIP_USER_BIT; + } + + STRIDE_F(coord, stride); + } + + if (nr > 0) { + *clipormask |= CLIP_USER_BIT; + if (nr == count) { + *clipandmask |= CLIP_USER_BIT; + return; + } + } + } + } +} + + +static GLboolean +do_ndc_cliptest(GLcontext *ctx, struct vp_stage_data *store) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + /* Cliptest and perspective divide. Clip functions must clear + * the clipmask. + */ + store->ormask = 0; + store->andmask = CLIP_FRUSTUM_BITS; + + if (tnl->NeedNdcCoords) { + VB->NdcPtr = + _mesa_clip_tab[VB->ClipPtr->size]( VB->ClipPtr, + &store->ndcCoords, + store->clipmask, + &store->ormask, + &store->andmask ); + } + else { + VB->NdcPtr = NULL; + _mesa_clip_np_tab[VB->ClipPtr->size]( VB->ClipPtr, + NULL, + store->clipmask, + &store->ormask, + &store->andmask ); + } + + if (store->andmask) { + /* All vertices are outside the frustum */ + return GL_FALSE; + } + + /* Test userclip planes. This contributes to VB->ClipMask. + */ + /** XXX NEW_SLANG _Enabled ??? */ + if (ctx->Transform.ClipPlanesEnabled && (!ctx->VertexProgram._Enabled || + ctx->VertexProgram.Current->IsPositionInvariant)) { + userclip( ctx, + VB->ClipPtr, + store->clipmask, + &store->ormask, + &store->andmask ); + + if (store->andmask) { + return GL_FALSE; + } + } + + VB->ClipAndMask = store->andmask; + VB->ClipOrMask = store->ormask; + VB->ClipMask = store->clipmask; + + return GL_TRUE; +} /** @@ -52,7 +172,6 @@ * real dependencies on the rest of swrast. It should probably be * moved into main/ someday. */ - static void vp_fetch_texel(GLcontext *ctx, const GLfloat texcoord[4], GLfloat lambda, GLuint unit, GLfloat color[4]) @@ -85,22 +204,6 @@ _tnl_program_string(GLcontext *ctx, GLenum target, struct gl_program *program) } -/*! - * Private storage for the vertex program pipeline stage. - */ -struct vp_stage_data { - /** The results of running the vertex program go into these arrays. */ - GLvector4f results[VERT_RESULT_MAX]; - - GLvector4f ndcCoords; /**< normalized device coords */ - GLubyte *clipmask; /**< clip flags */ - GLubyte ormask, andmask; /**< for clipping */ -}; - - -#define VP_STAGE_DATA(stage) ((struct vp_stage_data *)(stage->privatePtr)) - - /** * Initialize virtual machine state prior to executing vertex program. */ @@ -139,96 +242,50 @@ init_machine(GLcontext *ctx, struct gl_program_machine *machine) machine->FetchTexelLod = vp_fetch_texel; machine->FetchTexelDeriv = NULL; /* not used by vertex programs */ + + machine->Samplers = ctx->VertexProgram._Current->Base.SamplerUnits; } /** - * Copy the 16 elements of a matrix into four consecutive program - * registers starting at 'pos'. + * Map the texture images which the vertex program will access (if any). */ static void -load_matrix(GLfloat registers[][4], GLuint pos, const GLfloat mat[16]) +map_textures(GLcontext *ctx, const struct gl_vertex_program *vp) { - GLuint i; - for (i = 0; i < 4; i++) { - registers[pos + i][0] = mat[0 + i]; - registers[pos + i][1] = mat[4 + i]; - registers[pos + i][2] = mat[8 + i]; - registers[pos + i][3] = mat[12 + i]; - } -} + GLuint u; + if (!ctx->Driver.MapTexture) + return; -/** - * As above, but transpose the matrix. - */ -static void -load_transpose_matrix(GLfloat registers[][4], GLuint pos, - const GLfloat mat[16]) -{ - MEMCPY(registers[pos], mat, 16 * sizeof(GLfloat)); + for (u = 0; u < ctx->Const.MaxVertexTextureImageUnits; u++) { + if (vp->Base.TexturesUsed[u]) { + /* Note: _Current *should* correspond to the target indicated + * in TexturesUsed[u]. + */ + ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[u]._Current); + } + } } /** - * Load current vertex program's parameter registers with tracked - * matrices (if NV program). This only needs to be done per - * glBegin/glEnd, not per-vertex. + * Unmap the texture images which were used by the vertex program (if any). */ -void -_mesa_load_tracked_matrices(GLcontext *ctx) +static void +unmap_textures(GLcontext *ctx, const struct gl_vertex_program *vp) { - GLuint i; + GLuint u; - for (i = 0; i < MAX_NV_VERTEX_PROGRAM_PARAMS / 4; i++) { - /* point 'mat' at source matrix */ - GLmatrix *mat; - if (ctx->VertexProgram.TrackMatrix[i] == GL_MODELVIEW) { - mat = ctx->ModelviewMatrixStack.Top; - } - else if (ctx->VertexProgram.TrackMatrix[i] == GL_PROJECTION) { - mat = ctx->ProjectionMatrixStack.Top; - } - else if (ctx->VertexProgram.TrackMatrix[i] == GL_TEXTURE) { - mat = ctx->TextureMatrixStack[ctx->Texture.CurrentUnit].Top; - } - else if (ctx->VertexProgram.TrackMatrix[i] == GL_COLOR) { - mat = ctx->ColorMatrixStack.Top; - } - else if (ctx->VertexProgram.TrackMatrix[i]==GL_MODELVIEW_PROJECTION_NV) { - /* XXX verify the combined matrix is up to date */ - mat = &ctx->_ModelProjectMatrix; - } - else if (ctx->VertexProgram.TrackMatrix[i] >= GL_MATRIX0_NV && - ctx->VertexProgram.TrackMatrix[i] <= GL_MATRIX7_NV) { - GLuint n = ctx->VertexProgram.TrackMatrix[i] - GL_MATRIX0_NV; - ASSERT(n < MAX_PROGRAM_MATRICES); - mat = ctx->ProgramMatrixStack[n].Top; - } - else { - /* no matrix is tracked, but we leave the register values as-is */ - assert(ctx->VertexProgram.TrackMatrix[i] == GL_NONE); - continue; - } + if (!ctx->Driver.MapTexture) + return; - /* load the matrix values into sequential registers */ - if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_IDENTITY_NV) { - load_matrix(ctx->VertexProgram.Parameters, i*4, mat->m); - } - else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_INVERSE_NV) { - _math_matrix_analyse(mat); /* update the inverse */ - ASSERT(!_math_matrix_is_dirty(mat)); - load_matrix(ctx->VertexProgram.Parameters, i*4, mat->inv); - } - else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_TRANSPOSE_NV) { - load_transpose_matrix(ctx->VertexProgram.Parameters, i*4, mat->m); - } - else { - assert(ctx->VertexProgram.TrackMatrixTransform[i] - == GL_INVERSE_TRANSPOSE_NV); - _math_matrix_analyse(mat); /* update the inverse */ - ASSERT(!_math_matrix_is_dirty(mat)); - load_transpose_matrix(ctx->VertexProgram.Parameters, i*4, mat->inv); + for (u = 0; u < ctx->Const.MaxVertexTextureImageUnits; u++) { + if (vp->Base.TexturesUsed[u]) { + /* Note: _Current *should* correspond to the target indicated + * in TexturesUsed[u]. + */ + ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[u]._Current); } } } @@ -259,6 +316,7 @@ run_vp( GLcontext *ctx, struct tnl_pipeline_stage *stage ) _mesa_load_state_parameters(ctx, program->Base.Parameters); } + /* make list of outputs to save some time below */ numOutputs = 0; for (i = 0; i < VERT_RESULT_MAX; i++) { if (program->Base.OutputsWritten & (1 << i)) { @@ -266,6 +324,8 @@ run_vp( GLcontext *ctx, struct tnl_pipeline_stage *stage ) } } + map_textures(ctx, program); + for (i = 0; i < VB->Count; i++) { GLuint attr; @@ -317,6 +377,8 @@ run_vp( GLcontext *ctx, struct tnl_pipeline_stage *stage ) #endif } + unmap_textures(ctx, program); + /* Fixup fog and point size results if needed */ if (program->IsNVProgram) { if (ctx->Fog.Enabled && @@ -334,12 +396,39 @@ run_vp( GLcontext *ctx, struct tnl_pipeline_stage *stage ) } } - /* Setup the VB pointers so that the next pipeline stages get - * their data from the right place (the program output arrays). - */ - VB->ClipPtr = &store->results[VERT_RESULT_HPOS]; - VB->ClipPtr->size = 4; - VB->ClipPtr->count = VB->Count; + if (program->IsPositionInvariant) { + /* We need the exact same transform as in the fixed function path here + * to guarantee invariance, depending on compiler optimization flags + * results could be different otherwise. + */ + VB->ClipPtr = TransformRaw( &store->results[0], + &ctx->_ModelProjectMatrix, + VB->AttribPtr[0] ); + + /* Drivers expect this to be clean to element 4... + */ + switch (VB->ClipPtr->size) { + case 1: + /* impossible */ + case 2: + _mesa_vector4f_clean_elem( VB->ClipPtr, VB->Count, 2 ); + /* fall-through */ + case 3: + _mesa_vector4f_clean_elem( VB->ClipPtr, VB->Count, 3 ); + /* fall-through */ + case 4: + break; + } + } + else { + /* Setup the VB pointers so that the next pipeline stages get + * their data from the right place (the program output arrays). + */ + VB->ClipPtr = &store->results[VERT_RESULT_HPOS]; + VB->ClipPtr->size = 4; + VB->ClipPtr->count = VB->Count; + } + VB->ColorPtr[0] = &store->results[VERT_RESULT_COL0]; VB->ColorPtr[1] = &store->results[VERT_RESULT_BFC0]; VB->SecondaryColorPtr[0] = &store->results[VERT_RESULT_COL1]; @@ -365,41 +454,10 @@ run_vp( GLcontext *ctx, struct tnl_pipeline_stage *stage ) } } - /* Cliptest and perspective divide. Clip functions must clear - * the clipmask. - */ - store->ormask = 0; - store->andmask = CLIP_FRUSTUM_BITS; - - if (tnl->NeedNdcCoords) { - VB->NdcPtr = - _mesa_clip_tab[VB->ClipPtr->size]( VB->ClipPtr, - &store->ndcCoords, - store->clipmask, - &store->ormask, - &store->andmask ); - } - else { - VB->NdcPtr = NULL; - _mesa_clip_np_tab[VB->ClipPtr->size]( VB->ClipPtr, - NULL, - store->clipmask, - &store->ormask, - &store->andmask ); - } - - if (store->andmask) /* All vertices are outside the frustum */ - return GL_FALSE; - - /* This is where we'd do clip testing against the user-defined - * clipping planes, but they're not supported by vertex programs. + /* Perform NDC and cliptest operations: */ - - VB->ClipOrMask = store->ormask; - VB->ClipMask = store->clipmask; - - return GL_TRUE; + return do_ndc_cliptest(ctx, store); } diff --git a/src/mesa/tnl/t_vb_texgen.c b/src/mesa/tnl/t_vb_texgen.c index 14d3876e54..7c1819b223 100644 --- a/src/mesa/tnl/t_vb_texgen.c +++ b/src/mesa/tnl/t_vb_texgen.c @@ -367,16 +367,16 @@ static void texgen( GLcontext *ctx, if (texUnit->TexGenEnabled & S_BIT) { GLuint i; - switch (texUnit->GenModeS) { + switch (texUnit->GenS.Mode) { case GL_OBJECT_LINEAR: _mesa_dotprod_tab[obj->size]( (GLfloat *)out->data, sizeof(out->data[0]), obj, - texUnit->ObjectPlaneS ); + texUnit->GenS.ObjectPlane ); break; case GL_EYE_LINEAR: _mesa_dotprod_tab[eye->size]( (GLfloat *)out->data, sizeof(out->data[0]), eye, - texUnit->EyePlaneS ); + texUnit->GenS.EyePlane ); break; case GL_SPHERE_MAP: for (i = 0; i < count; i++) @@ -400,16 +400,16 @@ static void texgen( GLcontext *ctx, if (texUnit->TexGenEnabled & T_BIT) { GLuint i; - switch (texUnit->GenModeT) { + switch (texUnit->GenT.Mode) { case GL_OBJECT_LINEAR: _mesa_dotprod_tab[obj->size]( &(out->data[0][1]), sizeof(out->data[0]), obj, - texUnit->ObjectPlaneT ); + texUnit->GenT.ObjectPlane ); break; case GL_EYE_LINEAR: _mesa_dotprod_tab[eye->size]( &(out->data[0][1]), sizeof(out->data[0]), eye, - texUnit->EyePlaneT ); + texUnit->GenT.EyePlane ); break; case GL_SPHERE_MAP: for (i = 0; i < count; i++) @@ -433,16 +433,16 @@ static void texgen( GLcontext *ctx, if (texUnit->TexGenEnabled & R_BIT) { GLuint i; - switch (texUnit->GenModeR) { + switch (texUnit->GenR.Mode) { case GL_OBJECT_LINEAR: _mesa_dotprod_tab[obj->size]( &(out->data[0][2]), sizeof(out->data[0]), obj, - texUnit->ObjectPlaneR ); + texUnit->GenR.ObjectPlane ); break; case GL_EYE_LINEAR: _mesa_dotprod_tab[eye->size]( &(out->data[0][2]), sizeof(out->data[0]), eye, - texUnit->EyePlaneR ); + texUnit->GenR.EyePlane ); break; case GL_REFLECTION_MAP_NV: for (i=0;i<count;i++) @@ -461,16 +461,16 @@ static void texgen( GLcontext *ctx, } if (texUnit->TexGenEnabled & Q_BIT) { - switch (texUnit->GenModeQ) { + switch (texUnit->GenQ.Mode) { case GL_OBJECT_LINEAR: _mesa_dotprod_tab[obj->size]( &(out->data[0][3]), sizeof(out->data[0]), obj, - texUnit->ObjectPlaneQ ); + texUnit->GenQ.ObjectPlane ); break; case GL_EYE_LINEAR: _mesa_dotprod_tab[eye->size]( &(out->data[0][3]), sizeof(out->data[0]), eye, - texUnit->EyePlaneQ ); + texUnit->GenQ.EyePlane ); break; default: _mesa_problem(ctx, "Bad Q texgen"); diff --git a/src/mesa/tnl/t_vertex.c b/src/mesa/tnl/t_vertex.c index b661524c87..fe4209ae57 100644 --- a/src/mesa/tnl/t_vertex.c +++ b/src/mesa/tnl/t_vertex.c @@ -376,6 +376,22 @@ void _tnl_notify_pipeline_output_change( GLcontext *ctx ) invalidate_funcs(vtx); } + +static void adjust_input_ptrs( GLcontext *ctx, GLint diff) +{ + struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; + struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); + struct tnl_clipspace_attr *a = vtx->attr; + const GLuint count = vtx->attr_count; + int j; + + diff -= 1; + for (j=0; j<count; ++j) { + register GLvector4f *vptr = VB->AttribPtr[a->attrib]; + (a++)->inputptr += diff*vptr->stride; + } +} + static void update_input_ptrs( GLcontext *ctx, GLuint start ) { struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; @@ -431,13 +447,40 @@ void *_tnl_emit_vertices_to_buffer( GLcontext *ctx, struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); update_input_ptrs(ctx, start); - /* Note: dest should not be adjusted for non-zero 'start' values: */ vtx->emit( ctx, end - start, (GLubyte*) dest ); return (void *)((GLubyte *)dest + vtx->vertex_size * (end - start)); } +/* Emit indexed VB vertices start..end to dest. Note that VB vertex at + * postion start will be emitted to dest at position zero. + */ + +void *_tnl_emit_indexed_vertices_to_buffer( GLcontext *ctx, + const GLuint *elts, + GLuint start, + GLuint end, + void *dest ) +{ + struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); + GLuint oldIndex; + GLubyte *cdest = dest; + + update_input_ptrs(ctx, oldIndex = elts[start++]); + vtx->emit( ctx, 1, cdest ); + cdest += vtx->vertex_size; + + for (; start < end; ++start) { + adjust_input_ptrs(ctx, elts[start] - oldIndex); + oldIndex = elts[start]; + vtx->emit( ctx, 1, cdest); + cdest += vtx->vertex_size; + } + + return (void *) cdest; +} + void _tnl_init_vertices( GLcontext *ctx, GLuint vb_size, @@ -492,27 +535,30 @@ void _tnl_init_vertices( GLcontext *ctx, void _tnl_free_vertices( GLcontext *ctx ) { - struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); - struct tnl_clipspace_fastpath *fp, *tmp; + TNLcontext *tnl = TNL_CONTEXT(ctx); + if (tnl) { + struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); + struct tnl_clipspace_fastpath *fp, *tmp; - if (vtx->vertex_buf) { - ALIGN_FREE(vtx->vertex_buf); - vtx->vertex_buf = NULL; - } - - for (fp = vtx->fastpath ; fp ; fp = tmp) { - tmp = fp->next; - FREE(fp->attr); - - /* KW: At the moment, fp->func is constrained to be allocated by - * _mesa_exec_alloc(), as the hardwired fastpaths in - * t_vertex_generic.c are handled specially. It would be nice - * to unify them, but this probably won't change until this - * module gets another overhaul. - */ - _mesa_exec_free((void *) fp->func); - FREE(fp); + if (vtx->vertex_buf) { + ALIGN_FREE(vtx->vertex_buf); + vtx->vertex_buf = NULL; + } + + for (fp = vtx->fastpath ; fp ; fp = tmp) { + tmp = fp->next; + FREE(fp->attr); + + /* KW: At the moment, fp->func is constrained to be allocated by + * _mesa_exec_alloc(), as the hardwired fastpaths in + * t_vertex_generic.c are handled specially. It would be nice + * to unify them, but this probably won't change until this + * module gets another overhaul. + */ + _mesa_exec_free((void *) fp->func); + FREE(fp); + } + + vtx->fastpath = NULL; } - - vtx->fastpath = NULL; } diff --git a/src/mesa/tnl/t_vertex.h b/src/mesa/tnl/t_vertex.h index 712311a146..2dfd7b57f0 100644 --- a/src/mesa/tnl/t_vertex.h +++ b/src/mesa/tnl/t_vertex.h @@ -119,6 +119,18 @@ extern void *_tnl_emit_vertices_to_buffer( GLcontext *ctx, GLuint end, void *dest ); +/* This function isn't optimal. Check out + * gallium/auxilary/translate for a more comprehensive implementation of + * the same functionality. + */ + +extern void *_tnl_emit_indexed_vertices_to_buffer( GLcontext *ctx, + const GLuint *elts, + GLuint start, + GLuint end, + void *dest ); + + extern void _tnl_build_vertices( GLcontext *ctx, GLuint start, GLuint end, diff --git a/src/mesa/tnl/t_vertex_generic.c b/src/mesa/tnl/t_vertex_generic.c index c399423b9e..9812f8c808 100644 --- a/src/mesa/tnl/t_vertex_generic.c +++ b/src/mesa/tnl/t_vertex_generic.c @@ -29,11 +29,17 @@ #include "main/glheader.h" #include "main/context.h" #include "main/colormac.h" +#include "main/simple_list.h" #include "t_context.h" #include "t_vertex.h" -#include "simple_list.h" +#if 0 +#define DEBUG_INSERT printf("%s\n", __FUNCTION__) +#else +#define DEBUG_INSERT +#endif + /* * These functions take the NDC coordinates pointed to by 'in', apply the @@ -45,7 +51,7 @@ static INLINE void insert_4f_viewport_4( const struct tnl_clipspace_attr *a, GLu { GLfloat *out = (GLfloat *)v; const GLfloat * const vp = a->vp; - + DEBUG_INSERT; out[0] = vp[0] * in[0] + vp[12]; out[1] = vp[5] * in[1] + vp[13]; out[2] = vp[10] * in[2] + vp[14]; @@ -57,7 +63,7 @@ static INLINE void insert_4f_viewport_3( const struct tnl_clipspace_attr *a, GLu { GLfloat *out = (GLfloat *)v; const GLfloat * const vp = a->vp; - + DEBUG_INSERT; out[0] = vp[0] * in[0] + vp[12]; out[1] = vp[5] * in[1] + vp[13]; out[2] = vp[10] * in[2] + vp[14]; @@ -69,7 +75,7 @@ static INLINE void insert_4f_viewport_2( const struct tnl_clipspace_attr *a, GLu { GLfloat *out = (GLfloat *)v; const GLfloat * const vp = a->vp; - + DEBUG_INSERT; out[0] = vp[0] * in[0] + vp[12]; out[1] = vp[5] * in[1] + vp[13]; out[2] = vp[14]; @@ -81,7 +87,7 @@ static INLINE void insert_4f_viewport_1( const struct tnl_clipspace_attr *a, GLu { GLfloat *out = (GLfloat *)v; const GLfloat * const vp = a->vp; - + DEBUG_INSERT; out[0] = vp[0] * in[0] + vp[12]; out[1] = vp[13]; out[2] = vp[14]; @@ -93,7 +99,7 @@ static INLINE void insert_3f_viewport_3( const struct tnl_clipspace_attr *a, GLu { GLfloat *out = (GLfloat *)v; const GLfloat * const vp = a->vp; - + DEBUG_INSERT; out[0] = vp[0] * in[0] + vp[12]; out[1] = vp[5] * in[1] + vp[13]; out[2] = vp[10] * in[2] + vp[14]; @@ -104,10 +110,10 @@ static INLINE void insert_3f_viewport_2( const struct tnl_clipspace_attr *a, GLu { GLfloat *out = (GLfloat *)v; const GLfloat * const vp = a->vp; - + DEBUG_INSERT; out[0] = vp[0] * in[0] + vp[12]; out[1] = vp[5] * in[1] + vp[13]; - out[2] = vp[10] * in[2] + vp[14]; + out[2] = vp[14]; } static INLINE void insert_3f_viewport_1( const struct tnl_clipspace_attr *a, GLubyte *v, @@ -115,7 +121,7 @@ static INLINE void insert_3f_viewport_1( const struct tnl_clipspace_attr *a, GLu { GLfloat *out = (GLfloat *)v; const GLfloat * const vp = a->vp; - + DEBUG_INSERT; out[0] = vp[0] * in[0] + vp[12]; out[1] = vp[13]; out[2] = vp[14]; @@ -126,7 +132,7 @@ static INLINE void insert_2f_viewport_2( const struct tnl_clipspace_attr *a, GLu { GLfloat *out = (GLfloat *)v; const GLfloat * const vp = a->vp; - + DEBUG_INSERT; out[0] = vp[0] * in[0] + vp[12]; out[1] = vp[5] * in[1] + vp[13]; } @@ -136,7 +142,7 @@ static INLINE void insert_2f_viewport_1( const struct tnl_clipspace_attr *a, GLu { GLfloat *out = (GLfloat *)v; const GLfloat * const vp = a->vp; - + DEBUG_INSERT; out[0] = vp[0] * in[0] + vp[12]; out[1] = vp[13]; } @@ -150,7 +156,7 @@ static INLINE void insert_4f_4( const struct tnl_clipspace_attr *a, GLubyte *v, { GLfloat *out = (GLfloat *)(v); (void) a; - + DEBUG_INSERT; out[0] = in[0]; out[1] = in[1]; out[2] = in[2]; @@ -161,7 +167,7 @@ static INLINE void insert_4f_3( const struct tnl_clipspace_attr *a, GLubyte *v, { GLfloat *out = (GLfloat *)(v); (void) a; - + DEBUG_INSERT; out[0] = in[0]; out[1] = in[1]; out[2] = in[2]; @@ -172,7 +178,7 @@ static INLINE void insert_4f_2( const struct tnl_clipspace_attr *a, GLubyte *v, { GLfloat *out = (GLfloat *)(v); (void) a; - + DEBUG_INSERT; out[0] = in[0]; out[1] = in[1]; out[2] = 0; @@ -183,7 +189,7 @@ static INLINE void insert_4f_1( const struct tnl_clipspace_attr *a, GLubyte *v, { GLfloat *out = (GLfloat *)(v); (void) a; - + DEBUG_INSERT; out[0] = in[0]; out[1] = 0; out[2] = 0; @@ -194,7 +200,7 @@ static INLINE void insert_3f_xyw_4( const struct tnl_clipspace_attr *a, GLubyte { GLfloat *out = (GLfloat *)(v); (void) a; - + DEBUG_INSERT; out[0] = in[0]; out[1] = in[1]; out[2] = in[3]; @@ -203,6 +209,7 @@ static INLINE void insert_3f_xyw_4( const struct tnl_clipspace_attr *a, GLubyte static INLINE void insert_3f_xyw_err( const struct tnl_clipspace_attr *a, GLubyte *v, const GLfloat *in ) { (void) a; (void) v; (void) in; + DEBUG_INSERT; _mesa_exit(1); } @@ -210,7 +217,7 @@ static INLINE void insert_3f_3( const struct tnl_clipspace_attr *a, GLubyte *v, { GLfloat *out = (GLfloat *)(v); (void) a; - + DEBUG_INSERT; out[0] = in[0]; out[1] = in[1]; out[2] = in[2]; @@ -220,7 +227,7 @@ static INLINE void insert_3f_2( const struct tnl_clipspace_attr *a, GLubyte *v, { GLfloat *out = (GLfloat *)(v); (void) a; - + DEBUG_INSERT; out[0] = in[0]; out[1] = in[1]; out[2] = 0; @@ -230,7 +237,7 @@ static INLINE void insert_3f_1( const struct tnl_clipspace_attr *a, GLubyte *v, { GLfloat *out = (GLfloat *)(v); (void) a; - + DEBUG_INSERT; out[0] = in[0]; out[1] = 0; out[2] = 0; @@ -241,7 +248,7 @@ static INLINE void insert_2f_2( const struct tnl_clipspace_attr *a, GLubyte *v, { GLfloat *out = (GLfloat *)(v); (void) a; - + DEBUG_INSERT; out[0] = in[0]; out[1] = in[1]; } @@ -250,7 +257,7 @@ static INLINE void insert_2f_1( const struct tnl_clipspace_attr *a, GLubyte *v, { GLfloat *out = (GLfloat *)(v); (void) a; - + DEBUG_INSERT; out[0] = in[0]; out[1] = 0; } @@ -259,12 +266,13 @@ static INLINE void insert_1f_1( const struct tnl_clipspace_attr *a, GLubyte *v, { GLfloat *out = (GLfloat *)(v); (void) a; - + DEBUG_INSERT; out[0] = in[0]; } static INLINE void insert_null( const struct tnl_clipspace_attr *a, GLubyte *v, const GLfloat *in ) { + DEBUG_INSERT; (void) a; (void) v; (void) in; } @@ -272,6 +280,7 @@ static INLINE void insert_4chan_4f_rgba_4( const struct tnl_clipspace_attr *a, G const GLfloat *in ) { GLchan *c = (GLchan *)v; + DEBUG_INSERT; (void) a; UNCLAMPED_FLOAT_TO_CHAN(c[0], in[0]); UNCLAMPED_FLOAT_TO_CHAN(c[1], in[1]); @@ -283,6 +292,7 @@ static INLINE void insert_4chan_4f_rgba_3( const struct tnl_clipspace_attr *a, G const GLfloat *in ) { GLchan *c = (GLchan *)v; + DEBUG_INSERT; (void) a; UNCLAMPED_FLOAT_TO_CHAN(c[0], in[0]); UNCLAMPED_FLOAT_TO_CHAN(c[1], in[1]); @@ -294,6 +304,7 @@ static INLINE void insert_4chan_4f_rgba_2( const struct tnl_clipspace_attr *a, G const GLfloat *in ) { GLchan *c = (GLchan *)v; + DEBUG_INSERT; (void) a; UNCLAMPED_FLOAT_TO_CHAN(c[0], in[0]); UNCLAMPED_FLOAT_TO_CHAN(c[1], in[1]); @@ -305,6 +316,7 @@ static INLINE void insert_4chan_4f_rgba_1( const struct tnl_clipspace_attr *a, G const GLfloat *in ) { GLchan *c = (GLchan *)v; + DEBUG_INSERT; (void) a; UNCLAMPED_FLOAT_TO_CHAN(c[0], in[0]); c[1] = 0; @@ -315,6 +327,7 @@ static INLINE void insert_4chan_4f_rgba_1( const struct tnl_clipspace_attr *a, G static INLINE void insert_4ub_4f_rgba_4( const struct tnl_clipspace_attr *a, GLubyte *v, const GLfloat *in ) { + DEBUG_INSERT; (void) a; UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); @@ -325,6 +338,7 @@ static INLINE void insert_4ub_4f_rgba_4( const struct tnl_clipspace_attr *a, GLu static INLINE void insert_4ub_4f_rgba_3( const struct tnl_clipspace_attr *a, GLubyte *v, const GLfloat *in ) { + DEBUG_INSERT; (void) a; UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); @@ -335,6 +349,7 @@ static INLINE void insert_4ub_4f_rgba_3( const struct tnl_clipspace_attr *a, GLu static INLINE void insert_4ub_4f_rgba_2( const struct tnl_clipspace_attr *a, GLubyte *v, const GLfloat *in ) { + DEBUG_INSERT; (void) a; UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); @@ -345,6 +360,7 @@ static INLINE void insert_4ub_4f_rgba_2( const struct tnl_clipspace_attr *a, GLu static INLINE void insert_4ub_4f_rgba_1( const struct tnl_clipspace_attr *a, GLubyte *v, const GLfloat *in ) { + DEBUG_INSERT; (void) a; UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); v[1] = 0; @@ -355,6 +371,7 @@ static INLINE void insert_4ub_4f_rgba_1( const struct tnl_clipspace_attr *a, GLu static INLINE void insert_4ub_4f_bgra_4( const struct tnl_clipspace_attr *a, GLubyte *v, const GLfloat *in ) { + DEBUG_INSERT; (void) a; UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); @@ -365,6 +382,7 @@ static INLINE void insert_4ub_4f_bgra_4( const struct tnl_clipspace_attr *a, GLu static INLINE void insert_4ub_4f_bgra_3( const struct tnl_clipspace_attr *a, GLubyte *v, const GLfloat *in ) { + DEBUG_INSERT; (void) a; UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); @@ -375,6 +393,7 @@ static INLINE void insert_4ub_4f_bgra_3( const struct tnl_clipspace_attr *a, GLu static INLINE void insert_4ub_4f_bgra_2( const struct tnl_clipspace_attr *a, GLubyte *v, const GLfloat *in ) { + DEBUG_INSERT; (void) a; UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); @@ -385,6 +404,7 @@ static INLINE void insert_4ub_4f_bgra_2( const struct tnl_clipspace_attr *a, GLu static INLINE void insert_4ub_4f_bgra_1( const struct tnl_clipspace_attr *a, GLubyte *v, const GLfloat *in ) { + DEBUG_INSERT; (void) a; UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); v[1] = 0; @@ -395,6 +415,7 @@ static INLINE void insert_4ub_4f_bgra_1( const struct tnl_clipspace_attr *a, GLu static INLINE void insert_4ub_4f_argb_4( const struct tnl_clipspace_attr *a, GLubyte *v, const GLfloat *in ) { + DEBUG_INSERT; (void) a; UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]); UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); @@ -405,6 +426,7 @@ static INLINE void insert_4ub_4f_argb_4( const struct tnl_clipspace_attr *a, GLu static INLINE void insert_4ub_4f_argb_3( const struct tnl_clipspace_attr *a, GLubyte *v, const GLfloat *in ) { + DEBUG_INSERT; (void) a; UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]); UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); @@ -415,6 +437,7 @@ static INLINE void insert_4ub_4f_argb_3( const struct tnl_clipspace_attr *a, GLu static INLINE void insert_4ub_4f_argb_2( const struct tnl_clipspace_attr *a, GLubyte *v, const GLfloat *in ) { + DEBUG_INSERT; (void) a; UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]); UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); @@ -425,6 +448,7 @@ static INLINE void insert_4ub_4f_argb_2( const struct tnl_clipspace_attr *a, GLu static INLINE void insert_4ub_4f_argb_1( const struct tnl_clipspace_attr *a, GLubyte *v, const GLfloat *in ) { + DEBUG_INSERT; (void) a; UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]); v[2] = 0x00; @@ -435,6 +459,7 @@ static INLINE void insert_4ub_4f_argb_1( const struct tnl_clipspace_attr *a, GLu static INLINE void insert_4ub_4f_abgr_4( const struct tnl_clipspace_attr *a, GLubyte *v, const GLfloat *in ) { + DEBUG_INSERT; (void) a; UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]); UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); @@ -445,6 +470,7 @@ static INLINE void insert_4ub_4f_abgr_4( const struct tnl_clipspace_attr *a, GLu static INLINE void insert_4ub_4f_abgr_3( const struct tnl_clipspace_attr *a, GLubyte *v, const GLfloat *in ) { + DEBUG_INSERT; (void) a; UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]); UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); @@ -455,6 +481,7 @@ static INLINE void insert_4ub_4f_abgr_3( const struct tnl_clipspace_attr *a, GLu static INLINE void insert_4ub_4f_abgr_2( const struct tnl_clipspace_attr *a, GLubyte *v, const GLfloat *in ) { + DEBUG_INSERT; (void) a; UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]); UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); @@ -465,6 +492,7 @@ static INLINE void insert_4ub_4f_abgr_2( const struct tnl_clipspace_attr *a, GLu static INLINE void insert_4ub_4f_abgr_1( const struct tnl_clipspace_attr *a, GLubyte *v, const GLfloat *in ) { + DEBUG_INSERT; (void) a; UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]); v[2] = 0x00; @@ -475,6 +503,7 @@ static INLINE void insert_4ub_4f_abgr_1( const struct tnl_clipspace_attr *a, GLu static INLINE void insert_3ub_3f_rgb_3( const struct tnl_clipspace_attr *a, GLubyte *v, const GLfloat *in ) { + DEBUG_INSERT; (void) a; UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); @@ -484,6 +513,7 @@ static INLINE void insert_3ub_3f_rgb_3( const struct tnl_clipspace_attr *a, GLub static INLINE void insert_3ub_3f_rgb_2( const struct tnl_clipspace_attr *a, GLubyte *v, const GLfloat *in ) { + DEBUG_INSERT; (void) a; UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); @@ -493,6 +523,7 @@ static INLINE void insert_3ub_3f_rgb_2( const struct tnl_clipspace_attr *a, GLub static INLINE void insert_3ub_3f_rgb_1( const struct tnl_clipspace_attr *a, GLubyte *v, const GLfloat *in ) { + DEBUG_INSERT; (void) a; UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); v[1] = 0; @@ -502,6 +533,7 @@ static INLINE void insert_3ub_3f_rgb_1( const struct tnl_clipspace_attr *a, GLub static INLINE void insert_3ub_3f_bgr_3( const struct tnl_clipspace_attr *a, GLubyte *v, const GLfloat *in ) { + DEBUG_INSERT; (void) a; UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); @@ -511,6 +543,7 @@ static INLINE void insert_3ub_3f_bgr_3( const struct tnl_clipspace_attr *a, GLub static INLINE void insert_3ub_3f_bgr_2( const struct tnl_clipspace_attr *a, GLubyte *v, const GLfloat *in ) { + DEBUG_INSERT; (void) a; UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); @@ -520,6 +553,7 @@ static INLINE void insert_3ub_3f_bgr_2( const struct tnl_clipspace_attr *a, GLub static INLINE void insert_3ub_3f_bgr_1( const struct tnl_clipspace_attr *a, GLubyte *v, const GLfloat *in ) { + DEBUG_INSERT; (void) a; UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); v[1] = 0; @@ -530,6 +564,7 @@ static INLINE void insert_3ub_3f_bgr_1( const struct tnl_clipspace_attr *a, GLub static INLINE void insert_1ub_1f_1( const struct tnl_clipspace_attr *a, GLubyte *v, const GLfloat *in ) { + DEBUG_INSERT; (void) a; UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); } @@ -551,6 +586,7 @@ static void extract_4f_viewport( const struct tnl_clipspace_attr *a, GLfloat *ou /* Although included for completeness, the position coordinate is * usually handled differently during clipping. */ + DEBUG_INSERT; out[0] = (in[0] - vp[12]) / vp[0]; out[1] = (in[1] - vp[13]) / vp[5]; out[2] = (in[2] - vp[14]) / vp[10]; @@ -562,7 +598,7 @@ static void extract_3f_viewport( const struct tnl_clipspace_attr *a, GLfloat *ou { const GLfloat *in = (const GLfloat *)v; const GLfloat * const vp = a->vp; - + DEBUG_INSERT; out[0] = (in[0] - vp[12]) / vp[0]; out[1] = (in[1] - vp[13]) / vp[5]; out[2] = (in[2] - vp[14]) / vp[10]; @@ -575,7 +611,7 @@ static void extract_2f_viewport( const struct tnl_clipspace_attr *a, GLfloat *ou { const GLfloat *in = (const GLfloat *)v; const GLfloat * const vp = a->vp; - + DEBUG_INSERT; out[0] = (in[0] - vp[12]) / vp[0]; out[1] = (in[1] - vp[13]) / vp[5]; out[2] = 0; diff --git a/src/mesa/tnl/t_vertex_sse.c b/src/mesa/tnl/t_vertex_sse.c index 96def25206..7a255d680a 100644 --- a/src/mesa/tnl/t_vertex_sse.c +++ b/src/mesa/tnl/t_vertex_sse.c @@ -28,10 +28,10 @@ #include "main/glheader.h" #include "main/context.h" #include "main/colormac.h" +#include "main/simple_list.h" +#include "main/enums.h" #include "t_context.h" #include "t_vertex.h" -#include "simple_list.h" -#include "main/enums.h" #if defined(USE_SSE_ASM) @@ -39,6 +39,12 @@ #include "x86/common_x86_asm.h" +/** + * Number of bytes to allocate for generated SSE functions + */ +#define MAX_SSE_CODE_SIZE 1024 + + #define X 0 #define Y 1 #define Z 2 @@ -140,7 +146,8 @@ static void emit_load3f_1( struct x86_program *p, struct x86_reg dest, struct x86_reg arg0 ) { - emit_load4f_1(p, dest, arg0); + /* Loading from memory erases the upper bits. */ + sse_movss(&p->func, dest, arg0); } static void emit_load2f_2( struct x86_program *p, @@ -154,7 +161,8 @@ static void emit_load2f_1( struct x86_program *p, struct x86_reg dest, struct x86_reg arg0 ) { - emit_load4f_1(p, dest, arg0); + /* Loading from memory erases the upper bits. */ + sse_movss(&p->func, dest, arg0); } static void emit_load1f_1( struct x86_program *p, @@ -346,6 +354,7 @@ static GLboolean build_vertex_emit( struct x86_program *p ) struct x86_reg temp = x86_make_reg(file_XMM, 0); struct x86_reg vp0 = x86_make_reg(file_XMM, 1); struct x86_reg vp1 = x86_make_reg(file_XMM, 2); + struct x86_reg temp2 = x86_make_reg(file_XMM, 3); GLubyte *fixup, *label; /* Push a few regs? @@ -518,7 +527,8 @@ static GLboolean build_vertex_emit( struct x86_program *p ) sse_shufps(&p->func, temp, temp, SHUF(W,X,Y,Z)); get_src_ptr(p, srcECX, vtxESI, &a[1]); - emit_load(p, temp, 1, x86_deref(srcECX), a[1].inputsize); + emit_load(p, temp2, 1, x86_deref(srcECX), a[1].inputsize); + sse_movss(&p->func, temp, temp2); update_src_ptr(p, srcECX, vtxESI, &a[1]); /* Rearrange and possibly do BGR conversion: @@ -533,8 +543,8 @@ static GLboolean build_vertex_emit( struct x86_program *p ) } else { _mesa_printf("Can't emit 3ub\n"); + return GL_FALSE; /* add this later */ } - return GL_FALSE; /* add this later */ break; case EMIT_4UB_4F_RGBA: @@ -619,7 +629,10 @@ static GLboolean build_vertex_emit( struct x86_program *p ) x86_pop(&p->func, countEBP); x86_ret(&p->func); + assert(!vtx->emit); vtx->emit = (tnl_emit_func)x86_get_func(&p->func); + + assert( (char *) p->func.csr - (char *) p->func.store <= MAX_SSE_CODE_SIZE ); return GL_TRUE; } @@ -644,7 +657,10 @@ void _tnl_generate_sse_emit( GLcontext *ctx ) p.identity = x86_make_reg(file_XMM, 6); p.chan0 = x86_make_reg(file_XMM, 7); - x86_init_func(&p.func); + if (!x86_init_func_size(&p.func, MAX_SSE_CODE_SIZE)) { + vtx->emit = NULL; + return; + } if (build_vertex_emit(&p)) { _tnl_register_fastpath( vtx, GL_TRUE ); diff --git a/src/mesa/tnl/t_vp_build.c b/src/mesa/tnl/t_vp_build.c index b3b63cd3e4..7be4d95af6 100644 --- a/src/mesa/tnl/t_vp_build.c +++ b/src/mesa/tnl/t_vp_build.c @@ -1,8 +1,8 @@ /* * Mesa 3-D graphics library - * Version: 6.5 + * Version: 7.1 * - * Copyright (C) 2006 Tungsten Graphics All Rights Reserved. + * Copyright (C) 2007 Tungsten Graphics All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), diff --git a/src/mesa/tnl/tnl.h b/src/mesa/tnl/tnl.h index 4bdbed92df..4d628aa9a6 100644 --- a/src/mesa/tnl/tnl.h +++ b/src/mesa/tnl/tnl.h @@ -1,9 +1,8 @@ - /* * Mesa 3-D graphics library - * Version: 3.5 + * Version: 7.1 * - * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. + * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), |