summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGareth Hughes <gareth@valinux.com>2000-10-23 00:16:28 +0000
committerGareth Hughes <gareth@valinux.com>2000-10-23 00:16:28 +0000
commitfc2427e81b1c648550d0368652d6a475df785027 (patch)
tree92da699f07610ef4dc07ae4e0add4693c192040d /src
parente188b6e1f1d1e5e72985fdc3d69d26eeab26e987 (diff)
Major audit of all Mesa's x86 assembly code. This round is basically
general cleanups - more to come. Added P6 architecture timing to debug_xform routines. Still need to add test_all_vertex_functions test for the v16 asm. Dynamic reconfiguration of counter overhead for more accurate benchmarking.
Diffstat (limited to 'src')
-rw-r--r--src/mesa/main/blend.c8
-rw-r--r--src/mesa/main/context.c7
-rw-r--r--src/mesa/x86/3dnow.c165
-rw-r--r--src/mesa/x86/3dnow.h75
-rw-r--r--src/mesa/x86/assyntax.h2
-rw-r--r--src/mesa/x86/clip_args.h76
-rw-r--r--src/mesa/x86/common_x86.c109
-rw-r--r--src/mesa/x86/common_x86_asm.S152
-rw-r--r--src/mesa/x86/common_x86_asm.h63
-rw-r--r--src/mesa/x86/common_x86_features.h77
-rw-r--r--src/mesa/x86/mmx.h11
-rw-r--r--src/mesa/x86/mmx_blend.S4
-rw-r--r--src/mesa/x86/x86.c150
-rw-r--r--src/mesa/x86/x86.h20
-rw-r--r--src/mesa/x86/x86_cliptest.S248
-rw-r--r--src/mesa/x86/xform_args.h74
16 files changed, 948 insertions, 293 deletions
diff --git a/src/mesa/main/blend.c b/src/mesa/main/blend.c
index c4e8e86bba..8ea3297b7d 100644
--- a/src/mesa/main/blend.c
+++ b/src/mesa/main/blend.c
@@ -1,4 +1,4 @@
-/* $Id: blend.c,v 1.19 2000/10/19 18:08:05 brianp Exp $ */
+/* $Id: blend.c,v 1.20 2000/10/23 00:16:28 gareth Exp $ */
/*
* Mesa 3-D graphics library
@@ -826,7 +826,7 @@ blend_general( GLcontext *ctx, GLuint n, const GLubyte mask[],
#if defined(USE_MMX_ASM)
#include "X86/mmx.h"
-#include "X86/common_x86asm.h"
+#include "X86/common_x86_asm.h"
#endif
@@ -846,8 +846,8 @@ static void set_blend_function( GLcontext *ctx )
/* Hmm. A table here would have 12^4 == way too many entries.
* Provide a hook for MMX instead.
*/
- if (gl_x86_cpu_features & GL_CPU_MMX) {
- gl_mmx_set_blend_function (ctx);
+ if ( cpu_has_mmx ) {
+ gl_mmx_set_blend_function( ctx );
}
else
#endif
diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
index c57f37c579..8a5bf559e8 100644
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -1,4 +1,4 @@
-/* $Id: context.c,v 1.94 2000/10/21 00:02:47 brianp Exp $ */
+/* $Id: context.c,v 1.95 2000/10/23 00:16:28 gareth Exp $ */
/*
* Mesa 3-D graphics library
@@ -454,6 +454,7 @@ one_time_init( void )
gl_init_translate();
gl_init_vbrender();
gl_init_vbxform();
+ gl_init_vertices();
if (getenv("MESA_DEBUG")) {
_glapi_noop_enable_warnings(GL_TRUE);
@@ -884,7 +885,7 @@ init_attrib_groups( GLcontext *ctx )
ctx->Current.Primitive = (GLenum) (GL_POLYGON + 1);
ctx->Current.Flag = (VERT_NORM |
- VERT_INDEX |
+ VERT_INDEX |
VERT_RGBA |
VERT_EDGE |
VERT_TEX0_1 |
@@ -2114,4 +2115,4 @@ GLenum gl_reduce_prim[GL_POLYGON+1] = {
GL_TRIANGLES,
GL_TRIANGLES,
GL_TRIANGLES,
-};
+};
diff --git a/src/mesa/x86/3dnow.c b/src/mesa/x86/3dnow.c
index 5dc3b38fa1..3becc8cc58 100644
--- a/src/mesa/x86/3dnow.c
+++ b/src/mesa/x86/3dnow.c
@@ -1,4 +1,4 @@
-/* $Id: 3dnow.c,v 1.7 2000/09/17 21:12:40 gareth Exp $ */
+/* $Id: 3dnow.c,v 1.8 2000/10/23 00:16:28 gareth Exp $ */
/*
* Mesa 3-D graphics library
@@ -24,88 +24,75 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-
/*
* 3DNow! optimizations contributed by
* Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
*/
-#if defined(USE_3DNOW_ASM) && defined(USE_X86_ASM)
-#include "3dnow.h"
-
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
+#include "glheader.h"
#include "context.h"
#include "types.h"
-#include "xform.h"
#include "vertices.h"
+#include "xform.h"
+#include "3dnow.h"
#ifdef DEBUG
#include "debug_xform.h"
#endif
-
-
-#define XFORM_ARGS GLvector4f *to_vec, \
- const GLfloat m[16], \
- const GLvector4f *from_vec, \
- const GLubyte *mask, \
+#define XFORM_ARGS GLvector4f *to_vec, \
+ const GLfloat m[16], \
+ const GLvector4f *from_vec, \
+ const GLubyte *mask, \
const GLubyte flag
+#define DECLARE_XFORM_GROUP( pfx, sz, masked ) \
+ extern void _ASMAPI gl_##pfx##_transform_points##sz##_general_##masked( XFORM_ARGS ); \
+ extern void _ASMAPI gl_##pfx##_transform_points##sz##_identity_##masked( XFORM_ARGS ); \
+ extern void _ASMAPI gl_##pfx##_transform_points##sz##_3d_no_rot_##masked( XFORM_ARGS ); \
+ extern void _ASMAPI gl_##pfx##_transform_points##sz##_perspective_##masked( XFORM_ARGS ); \
+ extern void _ASMAPI gl_##pfx##_transform_points##sz##_2d_##masked( XFORM_ARGS ); \
+ extern void _ASMAPI gl_##pfx##_transform_points##sz##_2d_no_rot_##masked( XFORM_ARGS ); \
+ extern void _ASMAPI gl_##pfx##_transform_points##sz##_3d_##masked( XFORM_ARGS );
-#define DECLARE_XFORM_GROUP( pfx, v, masked ) \
- extern void _ASMAPI gl_##pfx##_transform_points##v##_general_##masked(XFORM_ARGS); \
- extern void _ASMAPI gl_##pfx##_transform_points##v##_identity_##masked(XFORM_ARGS); \
- extern void _ASMAPI gl_##pfx##_transform_points##v##_3d_no_rot_##masked(XFORM_ARGS); \
- extern void _ASMAPI gl_##pfx##_transform_points##v##_perspective_##masked(XFORM_ARGS); \
- extern void _ASMAPI gl_##pfx##_transform_points##v##_2d_##masked(XFORM_ARGS); \
- extern void _ASMAPI gl_##pfx##_transform_points##v##_2d_no_rot_##masked(XFORM_ARGS); \
- extern void _ASMAPI gl_##pfx##_transform_points##v##_3d_##masked(XFORM_ARGS);
+#define ASSIGN_XFORM_GROUP( pfx, cma, sz, masked ) \
+ gl_transform_tab[cma][sz][MATRIX_GENERAL] = \
+ gl_##pfx##_transform_points##sz##_general_##masked; \
+ gl_transform_tab[cma][sz][MATRIX_IDENTITY] = \
+ gl_##pfx##_transform_points##sz##_identity_##masked; \
+ gl_transform_tab[cma][sz][MATRIX_3D_NO_ROT] = \
+ gl_##pfx##_transform_points##sz##_3d_no_rot_##masked; \
+ gl_transform_tab[cma][sz][MATRIX_PERSPECTIVE] = \
+ gl_##pfx##_transform_points##sz##_perspective_##masked; \
+ gl_transform_tab[cma][sz][MATRIX_2D] = \
+ gl_##pfx##_transform_points##sz##_2d_##masked; \
+ gl_transform_tab[cma][sz][MATRIX_2D_NO_ROT] = \
+ gl_##pfx##_transform_points##sz##_2d_no_rot_##masked; \
+ gl_transform_tab[cma][sz][MATRIX_3D] = \
+ gl_##pfx##_transform_points##sz##_3d_##masked;
-#define ASSIGN_XFORM_GROUP( pfx, cma, vsize, masked ) \
- gl_transform_tab[cma][vsize][MATRIX_GENERAL] = \
- gl_##pfx##_transform_points##vsize##_general_##masked; \
- gl_transform_tab[cma][vsize][MATRIX_IDENTITY] = \
- gl_##pfx##_transform_points##vsize##_identity_##masked; \
- gl_transform_tab[cma][vsize][MATRIX_3D_NO_ROT] = \
- gl_##pfx##_transform_points##vsize##_3d_no_rot_##masked; \
- gl_transform_tab[cma][vsize][MATRIX_PERSPECTIVE] = \
- gl_##pfx##_transform_points##vsize##_perspective_##masked; \
- gl_transform_tab[cma][vsize][MATRIX_2D] = \
- gl_##pfx##_transform_points##vsize##_2d_##masked; \
- gl_transform_tab[cma][vsize][MATRIX_2D_NO_ROT] = \
- gl_##pfx##_transform_points##vsize##_2d_no_rot_##masked; \
- gl_transform_tab[cma][vsize][MATRIX_3D] = \
- gl_##pfx##_transform_points##vsize##_3d_##masked;
-
-
-
-#define NORM_ARGS const GLmatrix *mat, \
- GLfloat scale, \
- const GLvector3f *in, \
- const GLfloat *lengths, \
- const GLubyte mask[], \
+#define NORM_ARGS const GLmatrix *mat, \
+ GLfloat scale, \
+ const GLvector3f *in, \
+ const GLfloat *lengths, \
+ const GLubyte mask[], \
GLvector3f *dest
-
#define DECLARE_NORM_GROUP( pfx, masked ) \
- extern void _ASMAPI gl_##pfx##_rescale_normals_##masked(NORM_ARGS); \
- extern void _ASMAPI gl_##pfx##_normalize_normals_##masked(NORM_ARGS); \
- extern void _ASMAPI gl_##pfx##_transform_normals_##masked(NORM_ARGS); \
- extern void _ASMAPI gl_##pfx##_transform_normals_no_rot_##masked(NORM_ARGS); \
- extern void _ASMAPI gl_##pfx##_transform_rescale_normals_##masked(NORM_ARGS); \
- extern void _ASMAPI gl_##pfx##_transform_rescale_normals_no_rot_##masked(NORM_ARGS); \
- extern void _ASMAPI gl_##pfx##_transform_normalize_normals_##masked(NORM_ARGS); \
- extern void _ASMAPI gl_##pfx##_transform_normalize_normals_no_rot_##masked(NORM_ARGS);
-
+ extern void _ASMAPI gl_##pfx##_rescale_normals_##masked( NORM_ARGS ); \
+ extern void _ASMAPI gl_##pfx##_normalize_normals_##masked( NORM_ARGS ); \
+ extern void _ASMAPI gl_##pfx##_transform_normals_##masked( NORM_ARGS ); \
+ extern void _ASMAPI gl_##pfx##_transform_normals_no_rot_##masked( NORM_ARGS ); \
+ extern void _ASMAPI gl_##pfx##_transform_rescale_normals_##masked( NORM_ARGS ); \
+ extern void _ASMAPI gl_##pfx##_transform_rescale_normals_no_rot_##masked( NORM_ARGS ); \
+ extern void _ASMAPI gl_##pfx##_transform_normalize_normals_##masked( NORM_ARGS ); \
+ extern void _ASMAPI gl_##pfx##_transform_normalize_normals_no_rot_##masked( NORM_ARGS );
#define ASSIGN_NORM_GROUP( pfx, cma, masked ) \
@@ -127,24 +114,7 @@
gl_##pfx##_transform_normalize_normals_no_rot_##masked;
-extern void _ASMAPI gl_3dnow_project_vertices( GLfloat *first,
- GLfloat *last,
- const GLfloat *m,
- GLuint stride );
-
-extern void _ASMAPI gl_3dnow_project_clipped_vertices( GLfloat *first,
- GLfloat *last,
- const GLfloat *m,
- GLuint stride,
- const GLubyte *clipmask );
-
-extern void _ASMAPI gl_v16_3dnow_general_xform( GLfloat *first_vert,
- const GLfloat *m,
- const GLfloat *src,
- GLuint src_stride,
- GLuint count );
-
-
+#ifdef USE_3DNOW_ASM
DECLARE_XFORM_GROUP( 3dnow, 1, raw )
DECLARE_XFORM_GROUP( 3dnow, 2, raw )
DECLARE_XFORM_GROUP( 3dnow, 3, raw )
@@ -159,8 +129,28 @@ DECLARE_NORM_GROUP( 3dnow, raw )
/*DECLARE_NORM_GROUP( 3dnow, masked )*/
-void gl_init_3dnow_asm_transforms( void )
+extern void _ASMAPI gl_v16_3dnow_general_xform( GLfloat *first_vert,
+ const GLfloat *m,
+ const GLfloat *src,
+ GLuint src_stride,
+ GLuint count );
+
+extern void _ASMAPI gl_3dnow_project_vertices( GLfloat *first,
+ GLfloat *last,
+ const GLfloat *m,
+ GLuint stride );
+
+extern void _ASMAPI gl_3dnow_project_clipped_vertices( GLfloat *first,
+ GLfloat *last,
+ const GLfloat *m,
+ GLuint stride,
+ const GLubyte *clipmask );
+#endif
+
+
+void gl_init_3dnow_transform_asm( void )
{
+#ifdef USE_3DNOW_ASM
ASSIGN_XFORM_GROUP( 3dnow, 0, 1, raw );
ASSIGN_XFORM_GROUP( 3dnow, 0, 2, raw );
ASSIGN_XFORM_GROUP( 3dnow, 0, 3, raw );
@@ -178,21 +168,18 @@ void gl_init_3dnow_asm_transforms( void )
gl_test_all_transform_functions( "3DNow!" );
gl_test_all_normal_transform_functions( "3DNow!" );
#endif
-
- /* Hook in some stuff for vertices.c.
- */
- gl_xform_points3_v16_general = gl_v16_3dnow_general_xform;
- gl_project_v16 = gl_3dnow_project_vertices;
- gl_project_clipped_v16 = gl_3dnow_project_clipped_vertices;
+#endif
}
-#else
-
-/* silence compiler warning */
-extern void _mesa_3dnow_dummy_function( void );
-
-void _mesa_3dnow_dummy_function( void )
+void gl_init_3dnow_vertex_asm( void )
{
-}
+#ifdef USE_3DNOW_ASM
+ gl_xform_points3_v16_general = gl_v16_3dnow_general_xform;
+ gl_project_v16 = gl_3dnow_project_vertices;
+ gl_project_clipped_v16 = gl_3dnow_project_clipped_vertices;
+#if 0
+ gl_test_all_vertex_functions( "3DNow!" );
+#endif
#endif
+}
diff --git a/src/mesa/x86/3dnow.h b/src/mesa/x86/3dnow.h
index b06cc5b288..1e17cc4bb4 100644
--- a/src/mesa/x86/3dnow.h
+++ b/src/mesa/x86/3dnow.h
@@ -1,21 +1,21 @@
-/* $Id: 3dnow.h,v 1.1 1999/08/19 00:55:42 jtg Exp $ */
+/* $Id: 3dnow.h,v 1.2 2000/10/23 00:16:28 gareth Exp $ */
/*
* Mesa 3-D graphics library
- * Version: 3.1
- *
+ * Version: 3.5
+ *
* Copyright (C) 1999 Brian Paul All Rights Reserved.
- *
+ *
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
- *
+ *
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
- *
+ *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
@@ -24,72 +24,17 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-
/*
* 3DNow! optimizations contributed by
* Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
*/
-
-#ifndef _3dnow_h
-#define _3dnow_h
-
-
+#ifndef __3DNOW_H__
+#define __3DNOW_H__
#include "xform.h"
-
-void gl_init_3dnow_asm_transforms (void);
-
-
-
-
-#if 0
-GLvector4f *gl_project_points( GLvector4f *proj_vec,
- const GLvector4f *clip_vec )
-{
- __asm__ (
- " femms \n"
- " \n"
- " movq (%0), %%mm0 # x1 | x0 \n"
- " movq 8(%0), %%mm1 # oow | x2 \n"
- " \n"
- "1: movq %%mm1, %%mm2 # oow | x2 \n"
- " addl %2, %0 # next point \n"
- " \n"
- " punpckhdq %%mm2, %%mm2 # oow | oow \n"
- " addl $16, %1 # next point \n"
- " \n"
- " pfrcp %%mm2, %%mm3 # 1/oow | 1/oow \n"
- " decl %3 \n"
- " \n"
- " pfmul %%mm3, %%mm0 # x1/oow | x0/oow \n"
- " movq %%mm0, -16(%1) # write r0, r1 \n"
- " \n"
- " pfmul %%mm3, %%mm1 # 1 | x2/oow \n"
- " movq (%0), %%mm0 # x1 | x0 \n"
- " \n"
- " movd %%mm1, 8(%1) # write r2 \n"
- " movd %%mm3, 12(%1) # write r3 \n"
- " \n"
- " movq 8(%0), %%mm1 # oow | x2 \n"
- " ja 1b \n"
- " \n"
- " femms \n"
- " "
- ::"a" (clip_vec->start),
- "c" (proj_vec->start),
- "g" (clip_vec->stride),
- "d" (clip_vec->count)
- );
-
- proj_vec->flags |= VEC_SIZE_4;
- proj_vec->size = 3;
- proj_vec->count = clip_vec->count;
- return proj_vec;
-}
-#endif
-
-
+void gl_init_3dnow_transform_asm( void );
+void gl_init_3dnow_vertex_asm( void );
#endif
diff --git a/src/mesa/x86/assyntax.h b/src/mesa/x86/assyntax.h
index 34bd11f005..b9bb1f7ea0 100644
--- a/src/mesa/x86/assyntax.h
+++ b/src/mesa/x86/assyntax.h
@@ -1,4 +1,4 @@
-/* $Id: assyntax.h,v 1.15 2000/09/18 22:49:04 gareth Exp $ */
+/* $Id: assyntax.h,v 1.16 2000/10/23 00:16:28 gareth Exp $ */
#ifndef __ASSYNTAX_H__
#define __ASSYNTAX_H__
diff --git a/src/mesa/x86/clip_args.h b/src/mesa/x86/clip_args.h
new file mode 100644
index 0000000000..0829ec7fdc
--- /dev/null
+++ b/src/mesa/x86/clip_args.h
@@ -0,0 +1,76 @@
+/* $Id: clip_args.h,v 1.2 2000/10/23 00:16:28 gareth Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version: 3.5
+ *
+ * Copyright (C) 1999-2000 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Clip test function interface for assembly code. Simply define
+ * FRAME_OFFSET to the number of bytes pushed onto the stack before
+ * using the ARG_* argument macros.
+ *
+ * Gareth Hughes <gareth@valinux.com>
+ */
+
+#ifndef __CLIP_ARGS_H__
+#define __CLIP_ARGS_H__
+
+/* Offsets into GLvector4f
+ */
+#define V4F_DATA 0
+#define V4F_START 4
+#define V4F_COUNT 8
+#define V4F_STRIDE 12
+#define V4F_SIZE 16
+#define V4F_FLAGS 20
+
+/* GLvector4f flags
+ */
+#define VEC_SIZE_1 1
+#define VEC_SIZE_2 3
+#define VEC_SIZE_3 7
+#define VEC_SIZE_4 15
+
+/*
+ * Offsets for clip_func arguments
+ *
+ * typedef GLvector4f *(*clip_func)( GLvector4f *vClip,
+ * GLvector4f *vProj,
+ * GLubyte clipMask[],
+ * GLubyte *orMask,
+ * GLubyte *andMask );
+ */
+
+#define OFFSET_SOURCE 4
+#define OFFSET_DEST 8
+#define OFFSET_CLIP 12
+#define OFFSET_OR 16
+#define OFFSET_AND 20
+
+#define ARG_SOURCE REGOFF(FRAME_OFFSET+OFFSET_SOURCE, ESP)
+#define ARG_DEST REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP)
+#define ARG_CLIP REGOFF(FRAME_OFFSET+OFFSET_CLIP, ESP)
+#define ARG_OR REGOFF(FRAME_OFFSET+OFFSET_OR, ESP)
+#define ARG_AND REGOFF(FRAME_OFFSET+OFFSET_AND, ESP)
+
+#endif
diff --git a/src/mesa/x86/common_x86.c b/src/mesa/x86/common_x86.c
index e779fe193b..93d21161a9 100644
--- a/src/mesa/x86/common_x86.c
+++ b/src/mesa/x86/common_x86.c
@@ -1,21 +1,21 @@
-/* $Id: common_x86.c,v 1.6 2000/01/25 17:04:47 brianp Exp $ */
+/* $Id: common_x86.c,v 1.7 2000/10/23 00:16:28 gareth Exp $ */
/*
* Mesa 3-D graphics library
- * Version: 3.3
- *
+ * Version: 3.5
+ *
* Copyright (C) 1999-2000 Brian Paul All Rights Reserved.
- *
+ *
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
- *
+ *
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
- *
+ *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
@@ -26,81 +26,102 @@
/*
- * Check CPU capabilities & initialize optimized funtions for this particular
- * processor.
+ * Check CPU capabilities & initialize optimized funtions for this particular
+ * processor.
*
- * Written by Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
- * Changed by Andre Werthmann <wertmann@cs.uni-potsdam.de> for using the
- * new Katmai functions
+ * Written by Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
+ * Changed by Andre Werthmann <wertmann@cs.uni-potsdam.de> for using the
+ * new Katmai functions.
*/
#include <stdlib.h>
#include <stdio.h>
-#include "common_x86asm.h"
+
+#include "common_x86_asm.h"
+
int gl_x86_cpu_features = 0;
-static void message(const char *msg)
+/* No reason for this to be public.
+ */
+extern int gl_identify_x86_cpu_features( void );
+
+
+static void message( const char *msg )
{
- if (getenv("MESA_DEBUG"))
- fprintf(stderr, "%s\n", msg);
+ if ( getenv( "MESA_DEBUG" ) ) {
+ fprintf( stderr, "%s\n", msg );
+ }
}
-void gl_init_all_x86_asm (void)
+void gl_init_all_x86_transform_asm( void )
{
#ifdef USE_X86_ASM
- gl_x86_cpu_features = gl_identify_x86_cpu_features ();
- gl_x86_cpu_features |= GL_CPU_AnyX86;
+ gl_x86_cpu_features = gl_identify_x86_cpu_features();
- if (getenv("MESA_NO_ASM") != 0)
+ if ( getenv( "MESA_NO_ASM" ) ) {
gl_x86_cpu_features = 0;
-
- if (gl_x86_cpu_features & GL_CPU_GenuineIntel) {
- message("GenuineIntel cpu detected.");
}
- if (gl_x86_cpu_features) {
- gl_init_x86_asm_transforms ();
+ if ( gl_x86_cpu_features ) {
+ gl_init_x86_transform_asm();
}
#ifdef USE_MMX_ASM
- if (gl_x86_cpu_features & GL_CPU_MMX) {
- char *s = getenv( "MESA_NO_MMX" );
- if (s == NULL) {
- message("MMX cpu detected.");
+ if ( cpu_has_mmx ) {
+ if ( getenv( "MESA_NO_MMX" ) == 0 ) {
+ message( "MMX cpu detected." );
} else {
- gl_x86_cpu_features &= (~GL_CPU_MMX);
+ gl_x86_cpu_features &= ~(X86_FEATURE_MMX);
}
}
#endif
-
#ifdef USE_3DNOW_ASM
- if (gl_x86_cpu_features & GL_CPU_3Dnow) {
- char *s = getenv( "MESA_NO_3DNOW" );
- if (s == NULL) {
- message("3Dnow cpu detected.");
- gl_init_3dnow_asm_transforms ();
+ if ( cpu_has_3dnow ) {
+ if ( getenv( "MESA_NO_3DNOW" ) == 0 ) {
+ message( "3Dnow cpu detected." );
+ gl_init_3dnow_transform_asm();
} else {
- gl_x86_cpu_features &= (~GL_CPU_3Dnow);
+ gl_x86_cpu_features &= ~(X86_FEATURE_3DNOW);
}
}
#endif
-
#ifdef USE_KATMAI_ASM
- if (gl_x86_cpu_features & GL_CPU_Katmai) {
- char *s = getenv( "MESA_NO_KATMAI" );
- if (s == NULL) {
- message("Katmai cpu detected.");
- gl_init_katmai_asm_transforms ();
+ if ( cpu_has_xmm ) {
+ if ( getenv( "MESA_NO_KATMAI" ) == 0 ) {
+ message( "Katmai cpu detected." );
+ gl_init_katmai_transform_asm();
} else {
- gl_x86_cpu_features &= (~GL_CPU_Katmai);
+ gl_x86_cpu_features &= ~(X86_FEATURE_XMM);
}
}
#endif
-
#endif
}
+/* Note: the above function must be called before this one, so that
+ * gl_x86_cpu_features gets correctly initialized.
+ */
+void gl_init_all_x86_vertex_asm( void )
+{
+#ifdef USE_X86_ASM
+ if ( gl_x86_cpu_features ) {
+ gl_init_x86_vertex_asm();
+ }
+
+#ifdef USE_3DNOW_ASM
+ if ( cpu_has_3dnow && getenv( "MESA_NO_3DNOW" ) == 0 ) {
+ gl_init_3dnow_vertex_asm();
+ }
+#endif
+
+#ifdef USE_KATMAI_ASM
+ if ( cpu_has_xmm && getenv( "MESA_NO_KATMAI" ) == 0 ) {
+ gl_init_katmai_vertex_asm();
+ }
+#endif
+#endif
+}
diff --git a/src/mesa/x86/common_x86_asm.S b/src/mesa/x86/common_x86_asm.S
new file mode 100644
index 0000000000..675711e44a
--- /dev/null
+++ b/src/mesa/x86/common_x86_asm.S
@@ -0,0 +1,152 @@
+/* $Id: common_x86_asm.S,v 1.2 2000/10/23 00:16:28 gareth Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version: 3.5
+ *
+ * Copyright (C) 1999-2000 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Check extended CPU capabilities. Now justs returns the raw CPUID
+ * feature information, allowing the higher level code to interpret the
+ * results.
+ *
+ * Written by Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
+ *
+ * Cleaned up and simplified by Gareth Hughes <gareth@valinux.com>
+ */
+
+#include "assyntax.h"
+#include "common_x86_features.h"
+
+
+/* Intel vendor string
+ */
+#define GENU 0x756e6547 /* "Genu" */
+#define INEI 0x49656e69 /* "ineI" */
+#define NTEL 0x6c65746e /* "ntel" */
+
+/* AMD vendor string
+ */
+#define AUTH 0x68747541 /* "Auth" */
+#define ENTI 0x69746e65 /* "enti" */
+#define CAMD 0x444d4163 /* "cAMD" */
+
+
+ SEG_DATA
+
+/* We might want to print out some useful messages.
+ */
+LLBL( found_intel ): STRING( "Genuine Intel processor found\n\0" )
+LLBL( found_amd ): STRING( "Authentic AMD processor found\n\0" )
+
+
+ SEG_TEXT
+
+ALIGNTEXT4
+GLOBL GLNAME( gl_identify_x86_cpu_features )
+GLNAME( gl_identify_x86_cpu_features ):
+
+ PUSH_L ( EBX )
+
+ /* Test for the CPUID command. If the ID Flag bit in EFLAGS
+ * (bit 21) is writable, the CPUID command is present.
+ */
+ PUSHF_L
+ POP_L ( EAX )
+ MOV_L ( EAX, ECX )
+ XOR_L ( CONST(0x00200000), EAX )
+ PUSH_L ( EAX )
+ POPF_L
+ PUSHF_L
+ POP_L ( EAX )
+
+ /* Verify the ID Flag bit has been written.
+ */
+ CMP_L ( ECX, EAX )
+ JZ ( LLBL ( cpuid_done ) )
+
+ /* Get the CPU vendor info.
+ */
+ XOR_L ( EAX, EAX )
+ CPUID
+
+ /* Test for Intel processors. We must look for the
+ * "GenuineIntel" string in EBX, ECX and EDX.
+ */
+ CMP_L ( CONST(GENU), EBX )
+ JNE ( LLBL( cpuid_amd ) )
+ CMP_L ( CONST(INEI), EDX )
+ JNE ( LLBL( cpuid_amd ) )
+ CMP_L ( CONST(NTEL), ECX )
+ JNE ( LLBL( cpuid_amd ) )
+
+ /* We have an Intel processor, so we can get the feature
+ * information with an CPUID input value of 1.
+ */
+ MOV_L ( CONST(0x1), EAX )
+ CPUID
+ MOV_L ( EDX, EAX )
+ JMP ( LLBL( cpuid_done ) )
+
+LLBL( cpuid_amd ):
+
+ /* Test for AMD processors. We must look for the
+ * "AuthenticAMD" string in EBX, ECX and EDX.
+ */
+ CMP_L ( CONST(AUTH), EBX )
+ JNE ( LLBL( cpuid_other ) )
+ CMP_L ( CONST(ENTI), EDX )
+ JNE ( LLBL( cpuid_other ) )
+ CMP_L ( CONST(CAMD), ECX )
+ JNE ( LLBL( cpuid_other ) )
+
+ /* We have an AMD processor, so we can get the feature
+ * information after we verify that the extended functions are
+ * supported.
+ */
+ MOV_L ( CONST(0x80000000), EAX )
+ CPUID
+ TEST_L ( EAX, EAX )
+ JZ ( LLBL ( cpuid_failed ) )
+
+ MOV_L ( CONST(0x80000001), EAX )
+ CPUID
+ MOV_L ( EDX, EAX )
+ JMP ( LLBL ( cpuid_done ) )
+
+LLBL( cpuid_other ):
+
+ /* Test for other processors here when required.
+ */
+
+LLBL( cpuid_failed ):
+
+ /* If we can't determine the feature information, we must
+ * return zero to indicate that no platform-specific
+ * optimizations can be used.
+ */
+ MOV_L ( CONST(0), EAX )
+
+LLBL ( cpuid_done ):
+
+ POP_L ( EBX )
+ RET
diff --git a/src/mesa/x86/common_x86_asm.h b/src/mesa/x86/common_x86_asm.h
new file mode 100644
index 0000000000..880be22e8f
--- /dev/null
+++ b/src/mesa/x86/common_x86_asm.h
@@ -0,0 +1,63 @@
+/* $Id: common_x86_asm.h,v 1.2 2000/10/23 00:16:28 gareth Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version: 3.5
+ *
+ * Copyright (C) 1999 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Check CPU capabilities & initialize optimized funtions for this particular
+ * processor.
+ *
+ * Written by Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
+ * Changed by Andre Werthmann <wertmann@cs.uni-potsdam.de> for using the
+ * new Katmai functions
+ *
+ * Reimplemented by Gareth Hughes <gareth@valinux.com> in a more
+ * future-proof manner, based on code in the Linux kernel.
+ */
+
+#ifndef __COMMON_X86_ASM_H__
+#define __COMMON_X86_ASM_H__
+
+#include "common_x86_features.h"
+
+#ifdef HAVE_CONFIG_H
+#include "conf.h"
+#endif
+
+#ifdef USE_X86_ASM
+#include "x86.h"
+#ifdef USE_3DNOW_ASM
+#include "3dnow.h"
+#endif
+#ifdef USE_KATMAI_ASM
+#include "katmai.h"
+#endif
+#endif
+
+extern int gl_x86_cpu_features;
+
+extern void gl_init_all_x86_transform_asm( void );
+extern void gl_init_all_x86_vertex_asm( void );
+
+#endif
diff --git a/src/mesa/x86/common_x86_features.h b/src/mesa/x86/common_x86_features.h
new file mode 100644
index 0000000000..2f575c8bfe
--- /dev/null
+++ b/src/mesa/x86/common_x86_features.h
@@ -0,0 +1,77 @@
+/* $Id: common_x86_features.h,v 1.2 2000/10/23 00:16:28 gareth Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version: 3.5
+ *
+ * Copyright (C) 1999 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * x86 CPUID feature information. The raw data is returned by
+ * gl_identify_x86_cpu_features() and interpreted with the cpu_has_*
+ * helper macros.
+ *
+ * Gareth Hughes <gareth@valinux.com>
+ */
+
+#ifndef __COMMON_X86_FEATURES_H__
+#define __COMMON_X86_FEATURES_H__
+
+/* Capabilities of CPUs
+ */
+#define X86_FEATURE_FPU 0x00000001
+#define X86_FEATURE_VME 0x00000002
+#define X86_FEATURE_DE 0x00000004
+#define X86_FEATURE_PSE 0x00000008
+#define X86_FEATURE_TSC 0x00000010
+#define X86_FEATURE_MSR 0x00000020
+#define X86_FEATURE_PAE 0x00000040
+#define X86_FEATURE_MCE 0x00000080
+#define X86_FEATURE_CX8 0x00000100
+#define X86_FEATURE_APIC 0x00000200
+#define X86_FEATURE_10 0x00000400
+#define X86_FEATURE_SEP 0x00000800
+#define X86_FEATURE_MTRR 0x00001000
+#define X86_FEATURE_PGE 0x00002000
+#define X86_FEATURE_MCA 0x00004000
+#define X86_FEATURE_CMOV 0x00008000
+#define X86_FEATURE_PAT 0x00010000
+#define X86_FEATURE_PSE36 0x00020000
+#define X86_FEATURE_18 0x00040000
+#define X86_FEATURE_19 0x00080000
+#define X86_FEATURE_20 0x00100000
+#define X86_FEATURE_21 0x00200000
+#define X86_FEATURE_MMXEXT 0x00400000
+#define X86_FEATURE_MMX 0x00800000
+#define X86_FEATURE_FXSR 0x01000000
+#define X86_FEATURE_XMM 0x02000000
+#define X86_FEATURE_26 0x04000000
+#define X86_FEATURE_27 0x08000000
+#define X86_FEATURE_28 0x10000000
+#define X86_FEATURE_29 0x20000000
+#define X86_FEATURE_3DNOWEXT 0x40000000
+#define X86_FEATURE_3DNOW 0x80000000
+
+#define cpu_has_mmx (gl_x86_cpu_features & X86_FEATURE_MMX)
+#define cpu_has_xmm (gl_x86_cpu_features & X86_FEATURE_XMM)
+#define cpu_has_3dnow (gl_x86_cpu_features & X86_FEATURE_3DNOW)
+
+#endif
diff --git a/src/mesa/x86/mmx.h b/src/mesa/x86/mmx.h
index f0e05cf06d..0fad398803 100644
--- a/src/mesa/x86/mmx.h
+++ b/src/mesa/x86/mmx.h
@@ -1,20 +1,21 @@
+/* $Id: mmx.h,v 1.3 2000/10/23 00:16:28 gareth Exp $ */
/*
* Mesa 3-D graphics library
- * Version: 3.1
- *
+ * Version: 3.5
+ *
* Copyright (C) 1999 Brian Paul All Rights Reserved.
- *
+ *
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
- *
+ *
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
- *
+ *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
diff --git a/src/mesa/x86/mmx_blend.S b/src/mesa/x86/mmx_blend.S
index e7d6e118c8..21fa36ede6 100644
--- a/src/mesa/x86/mmx_blend.S
+++ b/src/mesa/x86/mmx_blend.S
@@ -350,7 +350,3 @@ LLBL(GMBT_1):
MOV_L ( EBP, ESP )
POP_L ( EBP )
RET
-
-
-
-
diff --git a/src/mesa/x86/x86.c b/src/mesa/x86/x86.c
index 2db200df05..fcd097867c 100644
--- a/src/mesa/x86/x86.c
+++ b/src/mesa/x86/x86.c
@@ -1,21 +1,21 @@
-/* $Id: x86.c,v 1.8 2000/06/27 22:10:01 brianp Exp $ */
+/* $Id: x86.c,v 1.9 2000/10/23 00:16:28 gareth Exp $ */
/*
* Mesa 3-D graphics library
- * Version: 3.1
- *
+ * Version: 3.5
+ *
* Copyright (C) 1999 Brian Paul All Rights Reserved.
- *
+ *
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
- *
+ *
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
- *
+ *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
@@ -28,7 +28,6 @@
* Intel x86 assembly code by Josh Vanderhoof
*/
-
#include "glheader.h"
#include "context.h"
#include "types.h"
@@ -36,89 +35,104 @@
#include "xform.h"
#include "x86.h"
-#ifdef USE_X86_ASM
-extern void _ASMAPI gl_v16_x86_cliptest_points4( GLfloat *first_vert,
- GLfloat *last_vert,
- GLubyte *or_mask,
- GLubyte *and_mask,
- GLubyte *clip_mask );
-
-
-extern void _ASMAPI gl_v16_x86_general_xform( GLfloat *dest,
- const GLfloat *m,
- const GLfloat *src,
- GLuint src_stride,
- GLuint count );
+#ifdef DEBUG
+#include "debug_xform.h"
#endif
-#define XFORM_ARGS GLvector4f *to_vec, \
- const GLfloat m[16], \
- const GLvector4f *from_vec, \
- const GLubyte *mask, \
+#define XFORM_ARGS GLvector4f *to_vec, \
+ const GLfloat m[16], \
+ const GLvector4f *from_vec, \
+ const GLubyte *mask, \
const GLubyte flag
-#define DECLARE_XFORM_GROUP(pfx, vsize, masked) \
- extern void _ASMAPI gl_##pfx##_transform_points##vsize##_general_##masked(XFORM_ARGS); \
- extern void _ASMAPI gl_##pfx##_transform_points##vsize##_identity_##masked(XFORM_ARGS); \
- extern void _ASMAPI gl_##pfx##_transform_points##vsize##_3d_no_rot_##masked(XFORM_ARGS); \
- extern void _ASMAPI gl_##pfx##_transform_points##vsize##_perspective_##masked(XFORM_ARGS); \
- extern void _ASMAPI gl_##pfx##_transform_points##vsize##_2d_##masked(XFORM_ARGS); \
- extern void _ASMAPI gl_##pfx##_transform_points##vsize##_2d_no_rot_##masked(XFORM_ARGS); \
- extern void _ASMAPI gl_##pfx##_transform_points##vsize##_3d_##masked(XFORM_ARGS);
-#define ASSIGN_XFORM_GROUP( pfx, cma, vsize, masked ) \
- gl_transform_tab[cma][vsize][MATRIX_GENERAL] \
- = gl_##pfx##_transform_points##vsize##_general_##masked; \
- gl_transform_tab[cma][vsize][MATRIX_IDENTITY] \
- = gl_##pfx##_transform_points##vsize##_identity_##masked; \
- gl_transform_tab[cma][vsize][MATRIX_3D_NO_ROT] \
- = gl_##pfx##_transform_points##vsize##_3d_no_rot_##masked; \
- gl_transform_tab[cma][vsize][MATRIX_PERSPECTIVE] \
- = gl_##pfx##_transform_points##vsize##_perspective_##masked; \
- gl_transform_tab[cma][vsize][MATRIX_2D] \
- = gl_##pfx##_transform_points##vsize##_2d_##masked; \
- gl_transform_tab[cma][vsize][MATRIX_2D_NO_ROT] \
- = gl_##pfx##_transform_points##vsize##_2d_no_rot_##masked; \
- gl_transform_tab[cma][vsize][MATRIX_3D] \
- = gl_##pfx##_transform_points##vsize##_3d_##masked;
+#define DECLARE_XFORM_GROUP( pfx, sz, masked ) \
+ extern void _ASMAPI gl_##pfx##_transform_points##sz##_general_##masked( XFORM_ARGS ); \
+ extern void _ASMAPI gl_##pfx##_transform_points##sz##_identity_##masked( XFORM_ARGS ); \
+ extern void _ASMAPI gl_##pfx##_transform_points##sz##_3d_no_rot_##masked( XFORM_ARGS ); \
+ extern void _ASMAPI gl_##pfx##_transform_points##sz##_perspective_##masked( XFORM_ARGS ); \
+ extern void _ASMAPI gl_##pfx##_transform_points##sz##_2d_##masked( XFORM_ARGS ); \
+ extern void _ASMAPI gl_##pfx##_transform_points##sz##_2d_no_rot_##masked( XFORM_ARGS ); \
+ extern void _ASMAPI gl_##pfx##_transform_points##sz##_3d_##masked( XFORM_ARGS );
+
+
+#define ASSIGN_XFORM_GROUP( pfx, cma, sz, masked ) \
+ gl_transform_tab[cma][sz][MATRIX_GENERAL] = \
+ gl_##pfx##_transform_points##sz##_general_##masked; \
+ gl_transform_tab[cma][sz][MATRIX_IDENTITY] = \
+ gl_##pfx##_transform_points##sz##_identity_##masked; \
+ gl_transform_tab[cma][sz][MATRIX_3D_NO_ROT] = \
+ gl_##pfx##_transform_points##sz##_3d_no_rot_##masked; \
+ gl_transform_tab[cma][sz][MATRIX_PERSPECTIVE] = \
+ gl_##pfx##_transform_points##sz##_perspective_##masked; \
+ gl_transform_tab[cma][sz][MATRIX_2D] = \
+ gl_##pfx##_transform_points##sz##_2d_##masked; \
+ gl_transform_tab[cma][sz][MATRIX_2D_NO_ROT] = \
+ gl_##pfx##_transform_points##sz##_2d_no_rot_##masked; \
+ gl_transform_tab[cma][sz][MATRIX_3D] = \
+ gl_##pfx##_transform_points##sz##_3d_##masked;
#ifdef USE_X86_ASM
- DECLARE_XFORM_GROUP( x86, 2, raw )
- DECLARE_XFORM_GROUP( x86, 3, raw )
- DECLARE_XFORM_GROUP( x86, 4, raw )
- DECLARE_XFORM_GROUP( x86, 2, masked )
- DECLARE_XFORM_GROUP( x86, 3, masked )
- DECLARE_XFORM_GROUP( x86, 4, masked )
+DECLARE_XFORM_GROUP( x86, 2, raw )
+DECLARE_XFORM_GROUP( x86, 3, raw )
+DECLARE_XFORM_GROUP( x86, 4, raw )
+DECLARE_XFORM_GROUP( x86, 2, masked )
+DECLARE_XFORM_GROUP( x86, 3, masked )
+DECLARE_XFORM_GROUP( x86, 4, masked )
+
+
+extern GLvector4f * _ASMAPI gl_x86_cliptest_points4( GLvector4f *clip_vec,
+ GLvector4f *proj_vec,
+ GLubyte clipMask[],
+ GLubyte *orMask,
+ GLubyte *andMask );
+
- extern GLvector4f * _ASMAPI gl_x86_cliptest_points4( GLvector4f *clip_vec,
- GLvector4f *proj_vec,
- GLubyte clipMask[],
- GLubyte *orMask,
- GLubyte *andMask );
+extern void _ASMAPI gl_v16_x86_cliptest_points4( GLfloat *first_vert,
+ GLfloat *last_vert,
+ GLubyte *or_mask,
+ GLubyte *and_mask,
+ GLubyte *clip_mask );
+
+
+extern void _ASMAPI gl_v16_x86_general_xform( GLfloat *dest,
+ const GLfloat *m,
+ const GLfloat *src,
+ GLuint src_stride,
+ GLuint count );
#endif
-void gl_init_x86_asm_transforms( void )
+void gl_init_x86_transform_asm( void )
{
#ifdef USE_X86_ASM
- ASSIGN_XFORM_GROUP( x86, 0, 2, raw )
- ASSIGN_XFORM_GROUP( x86, 0, 3, raw )
- ASSIGN_XFORM_GROUP( x86, 0, 4, raw )
+ ASSIGN_XFORM_GROUP( x86, 0, 2, raw );
+ ASSIGN_XFORM_GROUP( x86, 0, 3, raw );
+ ASSIGN_XFORM_GROUP( x86, 0, 4, raw );
- ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 2, masked )
- ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 3, masked )
- ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 4, masked )
+ ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 2, masked );
+ ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 3, masked );
+ ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 4, masked );
/* XXX this function has been found to cause FP overflow exceptions */
gl_clip_tab[4] = gl_x86_cliptest_points4;
#ifdef DEBUG
- gl_test_all_transform_functions("x86");
+ gl_test_all_transform_functions( "x86" );
+#endif
#endif
+}
- gl_cliptest_points4_v16 = gl_v16_x86_cliptest_points4;
- gl_xform_points3_v16_general = gl_v16_x86_general_xform;
+void gl_init_x86_vertex_asm( void )
+{
+#ifdef USE_X86_ASM
+ gl_xform_points3_v16_general = gl_v16_x86_general_xform;
+ gl_cliptest_points4_v16 = gl_v16_x86_cliptest_points4;
+
+#if 0
+ gl_test_all_vertex_functions( "x86" );
+#endif
#endif
}
diff --git a/src/mesa/x86/x86.h b/src/mesa/x86/x86.h
index c7aca91c9f..88afd18737 100644
--- a/src/mesa/x86/x86.h
+++ b/src/mesa/x86/x86.h
@@ -1,21 +1,21 @@
-/* $Id: x86.h,v 1.1 1999/08/19 00:55:42 jtg Exp $ */
+/* $Id: x86.h,v 1.2 2000/10/23 00:16:28 gareth Exp $ */
/*
* Mesa 3-D graphics library
- * Version: 3.1
- *
+ * Version: 3.5
+ *
* Copyright (C) 1999 Brian Paul All Rights Reserved.
- *
+ *
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
- *
+ *
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
- *
+ *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
@@ -28,10 +28,10 @@
* Intel x86 assembly code by Josh Vanderhoof
*/
+#ifndef __X86_H__
+#define __X86_H__
-#ifndef X86_H
-#define X86_H
-
-extern void gl_init_x86_asm_transforms(void);
+extern void gl_init_x86_transform_asm( void );
+extern void gl_init_x86_vertex_asm( void );
#endif
diff --git a/src/mesa/x86/x86_cliptest.S b/src/mesa/x86/x86_cliptest.S
new file mode 100644
index 0000000000..b1dd844b7b
--- /dev/null
+++ b/src/mesa/x86/x86_cliptest.S
@@ -0,0 +1,248 @@
+/* $Id: x86_cliptest.S,v 1.2 2000/10/23 00:16:28 gareth Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version: 3.5
+ *
+ * Copyright (C) 1999-2000 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "assyntax.h"
+#include "clip_args.h"
+
+#define FP_ONE 1065353216
+#define FP_ZERO 0
+
+#define SRC(i) REGOFF(i * 4, ESI)
+#define DST(i) REGOFF(i * 4, EDI)
+#define MAT(i) REGOFF(i * 4, EDX)
+
+
+/*
+ * Table for clip test.
+ *
+ * bit6 = SRC(3) < 0
+ * bit5 = SRC(2) < 0
+ * bit4 = abs(S(2)) > abs(S(3))
+ * bit3 = SRC(1) < 0
+ * bit2 = abs(S(1)) > abs(S(3))
+ * bit1 = SRC(0) < 0
+ * bit0 = abs(S(0)) > abs(S(3))
+ */
+
+ SEG_DATA
+
+clip_table:
+ D_BYTE 0, 1, 0, 2, 4, 5, 4, 6
+ D_BYTE 0, 1, 0, 2, 8, 9, 8, 10
+ D_BYTE 32, 33, 32, 34, 36, 37, 36, 38
+ D_BYTE 32, 33, 32, 34, 40, 41, 40, 42
+ D_BYTE 0, 1, 0, 2, 4, 5, 4, 6
+ D_BYTE 0, 1, 0, 2, 8, 9, 8, 10
+ D_BYTE 16, 17, 16, 18, 20, 21, 20, 22
+ D_BYTE 16, 17, 16, 18, 24, 25, 24, 26
+ D_BYTE 63, 61, 63, 62, 55, 53, 55, 54
+ D_BYTE 63, 61, 63, 62, 59, 57, 59, 58
+ D_BYTE 47, 45, 47, 46, 39, 37, 39, 38
+ D_BYTE 47, 45, 47, 46, 43, 41, 43, 42
+ D_BYTE 63, 61, 63, 62, 55, 53, 55, 54
+ D_BYTE 63, 61, 63, 62, 59, 57, 59, 58
+ D_BYTE 31, 29, 31, 30, 23, 21, 23, 22
+ D_BYTE 31, 29, 31, 30, 27, 25, 27, 26
+
+
+ SEG_TEXT
+
+/*
+ * gl_x86_cliptest_points4
+ *
+ * AL: ormask
+ * AH: andmask
+ * EBX: temp0
+ * ECX: temp1
+ * EDX: clipmask[]
+ * ESI: clip[]
+ * EDI: proj[]
+ * EBP: temp2
+ */
+
+#if defined(__ELF__) && defined(__PIC__) && !defined(ELFPIC)
+#define ELFPIC
+#endif
+
+ALIGNTEXT16
+GLOBL GLNAME( gl_x86_cliptest_points4 )
+GLNAME( gl_x86_cliptest_points4 ):
+
+#ifdef ELFPIC
+#define FRAME_OFFSET 20
+#else
+#define FRAME_OFFSET 16
+#endif
+ PUSH_L( ESI )
+ PUSH_L( EDI )
+ PUSH_L( EBP )
+ PUSH_L( EBX )
+
+#ifdef ELFPIC
+ /* store pointer to clip_table on stack */
+ CALL( LLBL( ctp4_get_eip ) )
+ ADD_L( CONST(_GLOBAL_OFFSET_TABLE_), EBX )
+ MOV_L( REGOFF(clip_table@GOT, EBX), EBX )
+ PUSH_L( EBX )
+ JMP( LLBL( ctp4_clip_table_ready ) )
+
+LLBL( ctp4_get_eip ):
+ /* store eip in ebx */
+ MOV_L( REGIND(ESP), EBX )
+ RET
+
+LLBL( ctp4_clip_table_ready ):
+#endif
+
+ MOV_L( ARG_SOURCE, ESI )
+ MOV_L( ARG_DEST, EDI )
+
+ MOV_L( ARG_CLIP, EDX )
+ MOV_L( ARG_OR, EBX )
+
+ MOV_L( ARG_AND, EBP )
+ MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+
+ MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+ MOV_L( REGOFF(V4F_START, ESI), ESI )
+
+ OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
+ MOV_L( EAX, ARG_SOURCE ) /* put stride in ARG_SOURCE */
+
+ MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
+ MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+
+ MOV_L( REGOFF(V4F_START, EDI), EDI )
+ ADD_L( EDX, ECX )
+
+ MOV_L( ECX, ARG_CLIP ) /* put clipmask + count in ARG_CLIP */
+ CMP_L( ECX, EDX )
+
+ MOV_B( REGIND(EBX), AL )
+ MOV_B( REGIND(EBP), AH )
+
+ JZ( LLBL( ctp4_finish ) )
+
+ALIGNTEXT16
+LLBL( ctp4_top ):
+
+ FLD1 /* F3 */
+ FDIV_S( SRC(3) )
+
+ MOV_L( SRC(3), EBP )
+ MOV_L( SRC(2), EBX )
+
+ XOR_L( ECX, ECX )
+ ADD_L( EBP, EBP ) /* ebp = abs(S(3))*2 ; carry = sign of S(3) */
+
+ ADC_L( ECX, ECX )
+ ADD_L( EBX, EBX ) /* ebx = abs(S(2))*2 ; carry = sign of S(2) */
+
+ ADC_L( ECX, ECX )
+ CMP_L( EBX, EBP ) /* carry = abs(S(2))*2 > abs(S(3))*2 */
+
+ ADC_L( ECX, ECX )
+ MOV_L( SRC(1), EBX )
+
+ ADD_L( EBX, EBX ) /* ebx = abs(S(1))*2 ; carry = sign of S(1) */
+
+ ADC_L( ECX, ECX )
+ CMP_L( EBX, EBP ) /* carry = abs(S(1))*2 > abs(S(3))*2 */
+
+ ADC_L( ECX, ECX )
+ MOV_L( SRC(0), EBX )
+
+ ADD_L( EBX, EBX ) /* ebx = abs(S(0))*2 ; carry = sign of S(0) */
+
+ ADC_L( ECX, ECX )
+ CMP_L( EBX, EBP ) /* carry = abs(S(0))*2 > abs(S(3))*2 */
+
+ ADC_L( ECX, ECX )
+
+#ifdef ELFPIC
+ MOV_L( REGIND(ESP), EBP ) /* clip_table */
+
+ MOV_B( REGBI(EBP, ECX), CL )
+#else
+ MOV_B( REGOFF(clip_table,ECX), CL )
+#endif
+
+ OR_B( CL, AL )
+ AND_B( CL, AH )
+
+ TEST_B( CL, CL )
+ MOV_B( CL, REGIND(EDX) )
+
+ JZ( LLBL( ctp4_proj ) )
+
+ FSTP( ST(0) ) /* */
+ JMP( LLBL( ctp4_next ) )
+
+LLBL( ctp4_proj ):
+
+ FLD_S( SRC(0) ) /* F0 F3 */
+ FMUL2( ST(1), ST(0) )
+
+ FLD_S( SRC(1) ) /* F1 F0 F3 */
+ FMUL2( ST(2), ST(0) )
+
+ FLD_S( SRC(2) ) /* F2 F1 F0 F3 */
+ FMUL2( ST(3), ST(0) )
+
+ FXCH( ST(2) ) /* F0 F1 F2 F3 */
+ FSTP_S( DST(0) ) /* F1 F2 F3 */
+ FSTP_S( DST(1) ) /* F2 F3 */
+ FSTP_S( DST(2) ) /* F3 */
+ FSTP_S( DST(3) ) /* */
+
+LLBL( ctp4_next ):
+
+ INC_L( EDX )
+ ADD_L( CONST(16), EDI )
+
+ ADD_L( ARG_SOURCE, ESI )
+ CMP_L( EDX, ARG_CLIP )
+
+ JNZ( LLBL( ctp4_top ) )
+
+ MOV_L( ARG_OR, ECX )
+ MOV_L( ARG_AND, EDX )
+
+ MOV_B( AL, REGIND(ECX) )
+ MOV_B( AH, REGIND(EDX) )
+
+LLBL( ctp4_finish ):
+
+ MOV_L( ARG_DEST, EAX )
+#ifdef ELFPIC
+ POP_L( ESI ) /* discard ptr to clip_table */
+#endif
+ POP_L( EBX )
+ POP_L( EBP )
+ POP_L( EDI )
+ POP_L( ESI )
+
+ RET
diff --git a/src/mesa/x86/xform_args.h b/src/mesa/x86/xform_args.h
new file mode 100644
index 0000000000..b69f0b1b9c
--- /dev/null
+++ b/src/mesa/x86/xform_args.h
@@ -0,0 +1,74 @@
+/* $Id: xform_args.h,v 1.2 2000/10/23 00:16:29 gareth Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version: 3.5
+ *
+ * Copyright (C) 1999-2000 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Transform function interface for assembly code. Simply define
+ * FRAME_OFFSET to the number of bytes pushed onto the stack before
+ * using the ARG_* argument macros.
+ *
+ * Gareth Hughes <gareth@valinux.com>
+ */
+
+#ifndef __XFORM_ARGS_H__
+#define __XFORM_ARGS_H__
+
+/* Offsets into GLvector4f
+ */
+#define V4F_DATA 0
+#define V4F_START 4
+#define V4F_COUNT 8
+#define V4F_STRIDE 12
+#define V4F_SIZE 16
+#define V4F_FLAGS 20
+
+/* GLvector4f flags
+ */
+#define VEC_SIZE_1 1
+#define VEC_SIZE_2 3
+#define VEC_SIZE_3 7
+#define VEC_SIZE_4 15
+
+/* Offsets for transform_func arguments
+ *
+ * typedef void (*transform_func)( GLvector4f *to_vec,
+ * const GLfloat m[16],
+ * const GLvector4f *from_vec,
+ * const GLubyte *clipmask,
+ * const GLubyte flag );
+ */
+#define OFFSET_DEST 4
+#define OFFSET_MATRIX 8
+#define OFFSET_SOURCE 12
+#define OFFSET_CLIP 16
+#define OFFSET_FLAG 20
+
+#define ARG_DEST REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP)
+#define ARG_MATRIX REGOFF(FRAME_OFFSET+OFFSET_MATRIX, ESP)
+#define ARG_SOURCE REGOFF(FRAME_OFFSET+OFFSET_SOURCE, ESP)
+#define ARG_CLIP REGOFF(FRAME_OFFSET+OFFSET_CLIP, ESP)
+#define ARG_FLAG REGOFF(FRAME_OFFSET+OFFSET_FLAG, ESP)
+
+#endif