Initial mach64 driver import for DRI mach64-0-0-6 branch,

this compiles but I doubt it works but it is a better starting point than the branch
author: Dave Airlie <airliedfreedesktop.org> 2004-02-05 22:45:00 +0000
committer: Dave Airlie <airliedfreedesktop.org> 2004-02-05 22:45:00 +0000
commit: 0fbeff2fa2e831e45e4dc6014c8f1e6abaa44aa1 (patch)
tree: 56dca07fce136806001a6147861a5b666fae46a4
parent: 493e6e1e900b3286c90db6dc1686162a9c869bd9 (diff)
30 files changed, 11715 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/mach64/Makefile.solo b/src/mesa/drivers/dri/mach64/Makefile.solo
new file mode 100644
index 0000000000..e3700f267a
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/Makefile.solo
@@ -0,0 +1,122 @@
+
+# Mesa 3-D graphics library
+# Version:  5.0
+# Copyright (C) 1995-2002  Brian Paul
+
+TOP = ../../../../..
+
+default: linux-solo
+
+SHARED_INCLUDES = $(INCLUDE_DIRS) -I. -I../common -Iserver
+MINIGLX_INCLUDES = -I$(TOP)/src/glx/mini
+
+DEFINES += \
+	-D_HAVE_SWRAST=1 \
+	-D_HAVE_SWTNL=1 \
+	-D_HAVE_SANITY=1 \
+	-D_HAVE_CODEGEN=1 \
+	-D_HAVE_LIGHTING=1 \
+	-D_HAVE_TEXGEN=1 \
+	-D_HAVE_USERCLIP=1 \
+	-DGLX_DIRECT_RENDERING 
+
+# Not yet
+# MINIGLX_SOURCES = server/mach64_dri.c 
+
+DRIVER_SOURCES = \
+		mach64_context.c \
+		mach64_ioctl.c \
+		mach64_screen.c \
+		mach64_span.c \
+		mach64_state.c \
+		mach64_tex.c \
+		mach64_texmem.c \
+		mach64_texstate.c \
+		mach64_tris.c \
+		mach64_vb.c \
+		mach64_dd.c \
+		mach64_lock.c \
+		mach64_native_vb.c \
+		../../common/driverfuncs.c \
+                ../common/mm.c \
+                ../common/utils.c \
+                ../common/texmem.c \
+                ../common/vblank.c
+
+INCLUDES = $(MINIGLX_INCLUDES) \
+	   $(SHARED_INCLUDES)
+
+
+C_SOURCES = $(DRIVER_SOURCES) \
+	    $(MINIGLX_SOURCES) 
+
+MESA_MODULES = $(TOP)/src/mesa/mesa.a
+
+
+ifeq ($(WINDOW_SYSTEM),dri)
+WINOBJ=$(MESABUILDDIR)/dri/dri.a
+WINLIB=
+else
+WINOBJ=
+WINLIB=-L$(MESA)/src/glx/mini
+endif
+
+ASM_SOURCES = 
+OBJECTS = $(C_SOURCES:.c=.o) \
+	  $(ASM_SOURCES:.S=.o) 
+
+### Include directories
+
+INCLUDE_DIRS = \
+	-I$(TOP)/include \
+	-I$(TOP)/src/mesa \
+	-I$(TOP)/src/mesa/main \
+	-I$(TOP)/src/mesa/glapi \
+	-I$(TOP)/src/mesa/math \
+	-I$(TOP)/src/mesa/transform \
+	-I$(TOP)/src/mesa/swrast \
+	-I$(TOP)/src/mesa/swrast_setup
+
+
+##### RULES #####
+
+.c.o:
+	$(CC) -c $(SHARED_INCLUDES) $(MINIGLX_INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
+
+.S.o:
+	$(CC) -c $(SHARED_INCLUDES) $(MINIGLX_INCLUDES) $(CFLAGS) $(DEFINES)  $< -o $@
+
+
+##### TARGETS #####
+
+targets: depend mach64_dri.so
+
+mach64_dri.so:  $(OBJECTS) $(MESA_MODULES) $(WINOBJ) Makefile.solo
+	rm -f $@ && gcc -o $@ -shared $(OBJECTS) $(MESA_MODULES) $(WINOBJ) $(WINLIB) -lc $(GL_LIB_DEPS)
+	rm -f $(TOP)/lib/mach64_dri.so && \
+	install mach64_dri.so $(TOP)/lib/mach64_dri.so
+
+$(TOP)/lib/mach64_dri.so:	mach64_dri.so
+	rm -f $(TOP)/lib/mach64_dri.so && \
+	install mach64_dri.so $(TOP)/lib/mach64_dri.so
+
+# Run 'make -f Makefile.solo dep' to update the dependencies if you change
+# what's included by any source file.
+depend: $(C_SOURCES) $(ASM_SOURCES)
+	makedepend -fdepend -Y $(SHARED_INCLUDES) $(MINIGLX_INCLUDES) \
+		$(C_SOURCES) $(ASM_SOURCES)
+
+
+# Emacs tags
+tags:
+	etags `find . -name \*.[ch]` `find ../include`
+
+
+# Remove .o and backup files
+clean:
+	-rm -f *.o */*.o *~ *.o *~ *.so server/*.o
+
+
+include $(TOP)/Make-config
+
+include depend
diff --git a/src/mesa/drivers/dri/mach64/mach64_context.c b/src/mesa/drivers/dri/mach64/mach64_context.c
new file mode 100644
index 0000000000..4ebca8774a
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_context.c
@@ -0,0 +1,327 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "glheader.h"
+#include "context.h"
+#include "simple_list.h"
+#include "imports.h"
+#include "matrix.h"
+#include "extensions.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "array_cache/acache.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+
+#include "drivers/common/driverfuncs.h"
+
+#include "mach64_context.h"
+#include "mach64_ioctl.h"
+#include "mach64_dd.h"
+#include "mach64_span.h"
+#include "mach64_state.h"
+#include "mach64_tex.h"
+#include "mach64_tris.h"
+#include "mach64_vb.h"
+
+#include "utils.h"
+#include "vblank.h"
+
+#ifndef MACH64_DEBUG
+int MACH64_DEBUG = (0);
+#endif
+
+static const struct dri_debug_control debug_control[] =
+{
+    { "sync",   DEBUG_ALWAYS_SYNC },
+    { "api",    DEBUG_VERBOSE_API },
+    { "msg",    DEBUG_VERBOSE_MSG },
+    { "lru",    DEBUG_VERBOSE_LRU },
+    { "dri",    DEBUG_VERBOSE_DRI },
+    { "ioctl",  DEBUG_VERBOSE_IOCTL },
+    { "prims",  DEBUG_VERBOSE_PRIMS },
+    { "count",  DEBUG_VERBOSE_COUNT },
+    { "nowait", DEBUG_NOWAIT },
+    { NULL,    0 }
+};
+
+static const char * const card_extensions[] =
+{
+   "GL_ARB_multitexture",
+   "GL_EXT_texture_edge_clamp",
+   "GL_MESA_ycbcr_texture",
+   "GL_SGIS_generate_mipmap",
+   "GL_SGIS_texture_edge_clamp",
+   NULL
+};
+
+
+/* Create the device specific context.
+  */
+GLboolean mach64CreateContext( const __GLcontextModes *glVisual,
+			       __DRIcontextPrivate *driContextPriv,
+                               void *sharedContextPrivate )
+{
+   GLcontext *ctx, *shareCtx;
+   __DRIscreenPrivate *driScreen = driContextPriv->driScreenPriv;
+   struct dd_function_table functions;
+   mach64ContextPtr mmesa;
+   mach64ScreenPtr mach64Screen;
+   int i, heap;
+
+#if DO_DEBUG
+   MACH64_DEBUG = driParseDebugString(getenv("MACH64_DEBUG"), debug_control);
+#endif
+
+   /* Allocate the mach64 context */
+   mmesa = (mach64ContextPtr) CALLOC( sizeof(*mmesa) );
+   if ( !mmesa ) 
+      return GL_FALSE;
+
+   /* Init default driver functions then plug in our Mach64-specific functions
+    * (the texture functions are especially important)
+    */
+   _mesa_init_driver_functions( &functions );
+   mach64InitDriverFuncs( &functions );
+   mach64InitIoctlFuncs( &functions );
+   mach64InitTextureFuncs( &functions );
+
+   /* Allocate the Mesa context */
+   if (sharedContextPrivate)
+      shareCtx = ((mach64ContextPtr) sharedContextPrivate)->glCtx;
+   else 
+      shareCtx = NULL;
+   mmesa->glCtx = _mesa_create_context(glVisual, shareCtx, 
+					&functions, (void *)mmesa);
+   if (!mmesa->glCtx) {
+      FREE(mmesa);
+      return GL_FALSE;
+   }
+   driContextPriv->driverPrivate = mmesa;
+   ctx = mmesa->glCtx;
+
+   mmesa->driContext = driContextPriv;
+   mmesa->driScreen = driScreen;
+   mmesa->driDrawable = NULL;
+   mmesa->hHWContext = driContextPriv->hHWContext;
+   mmesa->driHwLock = &driScreen->pSAREA->lock;
+   mmesa->driFd = driScreen->fd;
+
+   mach64Screen = mmesa->mach64Screen = (mach64ScreenPtr)driScreen->private;
+
+   mmesa->sarea = (ATISAREAPrivPtr)((char *)driScreen->pSAREA +
+				    sizeof(XF86DRISAREARec));
+
+   mmesa->CurrentTexObj[0] = NULL;
+   mmesa->CurrentTexObj[1] = NULL;
+
+   make_empty_list( &mmesa->SwappedOut );
+
+   mmesa->firstTexHeap = mach64Screen->firstTexHeap;
+   mmesa->lastTexHeap = mach64Screen->firstTexHeap + mach64Screen->numTexHeaps;
+
+   for ( i = mmesa->firstTexHeap ; i < mmesa->lastTexHeap ; i++ ) {
+      make_empty_list( &mmesa->TexObjList[i] );
+      mmesa->texHeap[i] = mmInit( 0, mach64Screen->texSize[i] );
+      mmesa->lastTexAge[i] = -1;
+   }
+
+   mmesa->RenderIndex = -1;		/* Impossible value */
+   mmesa->vert_buf = NULL;
+   mmesa->num_verts = 0;
+   mmesa->new_state = MACH64_NEW_ALL;
+   mmesa->dirty = MACH64_UPLOAD_ALL;
+
+   /* Set the maximum texture size small enough that we can
+    * guarentee that both texture units can bind a maximal texture
+    * and have them both in memory (on-card or AGP) at once.
+    * Test for 2 textures * bytes/texel * size * size.  There's no
+    * need to account for mipmaps since we only upload one level.
+    */
+   heap = mach64Screen->IsPCI ? MACH64_CARD_HEAP : MACH64_AGP_HEAP;
+
+   if ( mach64Screen->texSize[heap] >= 2 * mach64Screen->cpp * 1024*1024 ) {
+      ctx->Const.MaxTextureLevels = 11; /* 1024x1024 */
+   } else if ( mach64Screen->texSize[heap] >= 2 * mach64Screen->cpp * 512*512 ) {
+      ctx->Const.MaxTextureLevels = 10; /* 512x512 */
+   } else {
+      ctx->Const.MaxTextureLevels = 9;  /* 256x256 */
+   }
+
+   ctx->Const.MaxTextureUnits = 2;
+
+#if ENABLE_PERF_BOXES
+   mmesa->boxes = ( getenv( "LIBGL_PERFORMANCE_BOXES" ) != NULL );
+#endif
+
+   /* Allocate the vertex buffer
+    */
+   mmesa->vert_buf = ALIGN_MALLOC(MACH64_BUFFER_SIZE, 32);
+   if ( !mmesa->vert_buf )
+      return GL_FALSE;
+   mmesa->vert_used = 0;
+   mmesa->vert_total = MACH64_BUFFER_SIZE;
+   
+   /* Initialize the software rasterizer and helper modules.
+    */
+   _swrast_CreateContext( ctx );
+   _ac_CreateContext( ctx );
+   _tnl_CreateContext( ctx );
+   _swsetup_CreateContext( ctx );
+
+   /* Install the customized pipeline:
+    */
+/*     _tnl_destroy_pipeline( ctx ); */
+/*     _tnl_install_pipeline( ctx, mach64_pipeline ); */
+
+   /* Configure swrast to match hardware characteristics:
+    */
+   _swrast_allow_pixel_fog( ctx, GL_FALSE );
+   _swrast_allow_vertex_fog( ctx, GL_TRUE );
+
+   driInitExtensions( ctx, card_extensions, GL_TRUE );
+   mach64InitVB( ctx );
+   mach64InitTriFuncs( ctx );
+   mach64DDInitDriverFuncs( ctx );
+   mach64DDInitIoctlFuncs( ctx );
+   mach64DDInitStateFuncs( ctx );
+   mach64DDInitSpanFuncs( ctx );
+   mach64DDInitTextureFuncs( ctx );
+   mach64DDInitState( mmesa );
+
+   mmesa->do_irqs = (mmesa->mach64Screen->irq && !getenv("MACH64_NO_IRQS"));
+
+   mmesa->vblank_flags = (mmesa->do_irqs)
+      ? driGetDefaultVBlankFlags(&mmesa->optionCache) : VBLANK_FLAG_NO_IRQ;
+
+   driContextPriv->driverPrivate = (void *)mmesa;
+
+   return GL_TRUE;
+}
+
+/* Destroy the device specific context.
+ */
+void mach64DestroyContext( __DRIcontextPrivate *driContextPriv  )
+{
+   mach64ContextPtr mmesa = (mach64ContextPtr) driContextPriv->driverPrivate;
+
+   assert(mmesa);  /* should never be null */
+   if ( mmesa ) {
+      if (mmesa->glCtx->Shared->RefCount == 1) {
+         /* This share group is about to go away, free our private
+          * texture object data.
+          */
+         mach64TexObjPtr t, next_t;
+         int i;
+
+         for ( i = mmesa->firstTexHeap ; i < mmesa->lastTexHeap ; i++ ) {
+            foreach_s ( t, next_t, &mmesa->TexObjList[i] ) {
+               mach64DestroyTexObj( mmesa, t );
+            }
+            mmDestroy( mmesa->texHeap[i] );
+            mmesa->texHeap[i] = NULL;
+         }
+
+         foreach_s ( t, next_t, &mmesa->SwappedOut ) {
+            mach64DestroyTexObj( mmesa, t );
+         }
+      }
+
+      _swsetup_DestroyContext( mmesa->glCtx );
+      _tnl_DestroyContext( mmesa->glCtx );
+      _ac_DestroyContext( mmesa->glCtx );
+      _swrast_DestroyContext( mmesa->glCtx );
+
+      mach64FreeVB( mmesa->glCtx );
+
+      /* Free the vertex buffer */
+      if ( mmesa->vert_buf )
+	 ALIGN_FREE( mmesa->vert_buf );
+      
+      /* free the Mesa context */
+      mmesa->glCtx->DriverCtx = NULL;
+      _mesa_destroy_context(mmesa->glCtx);
+
+      FREE( mmesa );
+   }
+}
+
+/* Force the context `c' to be the current context and associate with it
+ * buffer `b'.
+ */
+GLboolean
+mach64MakeCurrent( __DRIcontextPrivate *driContextPriv,
+                 __DRIdrawablePrivate *driDrawPriv,
+                 __DRIdrawablePrivate *driReadPriv )
+{
+   if ( driContextPriv ) {
+      GET_CURRENT_CONTEXT(ctx);
+      mach64ContextPtr oldMach64Ctx = ctx ? MACH64_CONTEXT(ctx) : NULL;
+      mach64ContextPtr newMach64Ctx = (mach64ContextPtr) driContextPriv->driverPrivate;
+
+      if ( newMach64Ctx != oldMach64Ctx ) {
+	 newMach64Ctx->new_state |= MACH64_NEW_CONTEXT;
+	 newMach64Ctx->dirty = MACH64_UPLOAD_ALL;
+      }
+
+      if ( newMach64Ctx->driDrawable != driDrawPriv ) {
+	 newMach64Ctx->driDrawable = driDrawPriv;
+	 mach64CalcViewport( newMach64Ctx->glCtx );
+      }
+
+      _mesa_make_current2( newMach64Ctx->glCtx,
+                           (GLframebuffer *) driDrawPriv->driverPrivate,
+                           (GLframebuffer *) driReadPriv->driverPrivate );
+
+
+      newMach64Ctx->new_state |=  MACH64_NEW_CLIP;
+
+      if ( !newMach64Ctx->glCtx->Viewport.Width ) {
+	 _mesa_set_viewport(newMach64Ctx->glCtx, 0, 0,
+                            driDrawPriv->w, driDrawPriv->h);
+      }
+   } else {
+      _mesa_make_current( 0, 0 );
+   }
+
+   return GL_TRUE;
+}
+
+
+/* Force the context `c' to be unbound from its buffer.
+ */
+GLboolean
+mach64UnbindContext( __DRIcontextPrivate *driContextPriv )
+{
+   return GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/mach64/mach64_context.h b/src/mesa/drivers/dri/mach64/mach64_context.h
new file mode 100644
index 0000000000..508b4875ce
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_context.h
@@ -0,0 +1,431 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#ifndef __MACH64_CONTEXT_H__
+#define __MACH64_CONTEXT_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+#include "dri_util.h"
+
+#include "xf86drm.h"
+#include "mach64_common.h"
+
+#include "mtypes.h"
+#include "mm.h"
+
+#include "mach64_reg.h"
+
+#include "texmem.h"
+
+struct mach64_context;
+typedef struct mach64_context mach64ContextRec;
+typedef struct mach64_context *mach64ContextPtr;
+
+#include "mach64_lock.h"
+#include "mach64_screen.h"
+
+/* Experimental driver options */
+#define MACH64_CLIENT_STATE_EMITS       0
+
+/* Performace monitoring */
+#define ENABLE_PERF_BOXES               1
+
+/* Native vertex format */
+#define MACH64_NATIVE_VTXFMT		1
+
+/* Flags for what context state needs to be updated:
+ */
+#define MACH64_NEW_ALPHA		0x0001
+#define MACH64_NEW_DEPTH		0x0002
+#define MACH64_NEW_FOG			0x0004
+#define MACH64_NEW_CLIP			0x0008
+#define MACH64_NEW_CULL			0x0010
+#define MACH64_NEW_MASKS		0x0020
+#define MACH64_NEW_RENDER_UNUSED	0x0040
+#define MACH64_NEW_WINDOW		0x0080
+#define MACH64_NEW_TEXTURE		0x0100
+#define MACH64_NEW_CONTEXT		0x0200
+#define MACH64_NEW_ALL			0x03ff
+
+/* Flags for software fallback cases:
+ */
+#define MACH64_FALLBACK_TEXTURE		0x0001
+#define MACH64_FALLBACK_DRAW_BUFFER	0x0002
+#define MACH64_FALLBACK_READ_BUFFER	0x0004
+#define MACH64_FALLBACK_STENCIL		0x0008
+#define MACH64_FALLBACK_RENDER_MODE	0x0010
+#define MACH64_FALLBACK_MULTIDRAW	0x0020
+#define MACH64_FALLBACK_LOGICOP		0x0040
+#define MACH64_FALLBACK_SEP_SPECULAR	0x0080
+#define MACH64_FALLBACK_BLEND_EQ	0x0100
+#define MACH64_FALLBACK_BLEND_FUNC	0x0200
+
+#if MACH64_NATIVE_VTXFMT
+
+/* The vertex structures.
+ */
+
+#if 0
+/* This isn't actually necessary since all accesses to the vertex
+ * structure must be made through the LE32_* macros.
+ */
+
+typedef struct {
+   GLubyte	blue;
+   GLubyte	green;
+   GLubyte	red;
+   GLubyte	alpha;
+} mach64_color_t;
+
+typedef struct {
+   GLfloat u1, v1, w1;			/* Texture 1 coordinates */
+   GLfloat u0, v0, w0;			/* Texture 0 coordinates */
+   mach64_color_t specular;		/* Specular color */
+   GLuint z;				/* Depth coordinate */
+   mach64_color_t color;		/* Diffuse color */
+   GLushort y, x;			/* Coordinates in screen space */
+} mach64_vertex;
+#endif
+
+/* The size of this union is not of relevence:
+ */
+union mach64_vertex_t {
+   GLfloat f[16];
+   GLuint ui[16];
+   GLushort us2[16][2];
+   GLubyte ub4[16][4];
+};
+
+typedef union mach64_vertex_t mach64Vertex, *mach64VertexPtr;
+
+#else
+
+/* Use the templated vertex format:
+ */
+#define TAG(x) mach64##x
+#include "tnl_dd/t_dd_vertex.h"
+#undef TAG
+
+#endif /* MACH64_NATIVE_VTXFMT */
+
+/* Subpixel offsets for window coordinates:
+ * These are enough to fix most glean tests except polygonOffset.
+ * There are also still some gaps that show in e.g. the tunnel Mesa demo
+ * or the lament xscreensaver hack.
+ */
+#define SUBPIXEL_X	(0.0125F)
+#define SUBPIXEL_Y	(0.15F)
+
+
+typedef void (*mach64_tri_func)( mach64ContextPtr,
+				   mach64Vertex *,
+				   mach64Vertex *,
+				   mach64Vertex * );
+
+typedef void (*mach64_line_func)( mach64ContextPtr,
+				    mach64Vertex *,
+				    mach64Vertex * );
+
+typedef void (*mach64_point_func)( mach64ContextPtr,
+				     mach64Vertex * );
+
+#if TEXMEM
+struct mach64_texture_object {
+   driTextureObject   base;
+
+   GLuint offset;
+
+   GLuint dirty;
+   GLuint age;
+
+   GLint widthLog2;
+   GLint heightLog2;
+   GLint maxLog2;
+
+   GLint hasAlpha;
+   GLint textureFormat;
+
+   /* Have to keep these separate due to how they are programmed.
+    * FIXME: Why don't we just use the tObj values?
+    */
+   GLboolean BilinearMin;
+   GLboolean BilinearMag;
+   GLboolean ClampS;
+   GLboolean ClampT;
+};
+#else
+struct mach64_texture_object {
+   struct mach64_texture_object *next;
+   struct mach64_texture_object *prev;
+   struct gl_texture_object *tObj;
+
+   PMemBlock memBlock;
+   GLuint offset;
+   GLuint size;
+
+   GLuint dirty;
+   GLuint age;
+
+   GLint bound;
+   GLint heap;
+
+   GLint widthLog2;
+   GLint heightLog2;
+   GLint maxLog2;
+
+   GLint hasAlpha;
+   GLint textureFormat;
+
+   /* Have to keep these separate due to how they are programmed.
+    * FIXME: Why don't we just use the tObj values?
+    */
+   GLboolean BilinearMin;
+   GLboolean BilinearMag;
+   GLboolean ClampS;
+   GLboolean ClampT;
+};
+#endif
+
+typedef struct mach64_texture_object mach64TexObj, *mach64TexObjPtr;
+
+
+struct mach64_context {
+   GLcontext *glCtx;
+
+   /* Driver and hardware state management
+    */
+   GLuint new_state;
+   GLuint dirty;			/* Hardware state to be updated */
+   mach64_context_regs_t setup;
+
+   GLuint NewGLState;
+   GLuint Fallback;
+   GLuint SetupIndex;
+   GLuint SetupNewInputs;
+   GLuint RenderIndex;
+   GLfloat hw_viewport[16];
+   GLfloat depth_scale;
+   GLuint vertex_size;
+   GLuint vertex_stride_shift;
+   GLuint vertex_format;
+   GLuint num_verts;
+   char *verts;		
+
+   CARD32 Color;			/* Current draw color */
+   CARD32 ClearColor;			/* Color used to clear color buffer */
+   CARD32 ClearDepth;			/* Value used to clear depth buffer */
+
+   /* Map GL texture units onto hardware
+    */
+   GLint multitex;
+   GLint tmu_source[2];
+   GLint tex_dest[2];
+
+   /* Texture object bookkeeping
+    */
+   mach64TexObjPtr CurrentTexObj[2];
+#if TEXMEM
+   unsigned nr_heaps;
+   driTexHeap * texture_heaps[ R128_NR_TEX_HEAPS ];
+   driTextureObject swapped;
+#else
+   mach64TexObj TexObjList[MACH64_NR_TEX_HEAPS];
+   mach64TexObj SwappedOut;
+   memHeap_t *texHeap[MACH64_NR_TEX_HEAPS];
+   GLuint lastTexAge[MACH64_NR_TEX_HEAPS];
+   GLint firstTexHeap, lastTexHeap;
+#endif
+
+   /* Fallback rasterization functions
+    */
+   mach64_point_func draw_point;
+   mach64_line_func draw_line;
+   mach64_tri_func draw_tri;
+
+   /* Culling */
+   GLfloat backface_sign;
+
+   /* DMA buffers
+    */
+   void *vert_buf;
+   size_t vert_total;
+   unsigned vert_used;
+
+   GLuint hw_primitive;
+   GLenum render_primitive;
+
+   /* Visual, drawable, cliprect and scissor information
+    */
+   GLint drawOffset, drawPitch;
+   GLint drawX, drawY;                  /* origin of drawable in draw buffer */
+   GLint readOffset, readPitch;
+
+   GLuint numClipRects;			/* Cliprects for the draw buffer */
+   XF86DRIClipRectPtr pClipRects;
+
+   GLint scissor;
+   XF86DRIClipRectRec ScissorRect;	/* Current software scissor */
+
+   /* Mirrors of some DRI state
+    */
+   __DRIcontextPrivate	*driContext;	/* DRI context */
+   __DRIscreenPrivate	*driScreen;	/* DRI screen */
+   __DRIdrawablePrivate	*driDrawable;	/* DRI drawable bound to this ctx */
+
+   unsigned int lastStamp;		/* mirror driDrawable->lastStamp */
+
+   drmContext hHWContext;
+   drmLock *driHwLock;
+   int driFd;
+
+   mach64ScreenPtr mach64Screen;	/* Screen private DRI data */
+   ATISAREAPrivPtr sarea;		/* Private SAREA data */
+
+   GLuint hardwareWentIdle;
+
+#if ENABLE_PERF_BOXES
+   /* Performance counters
+    */
+   GLuint boxes;			/* Draw performance boxes */
+   GLuint c_clears;
+   GLuint c_drawWaits;
+   GLuint c_textureSwaps;
+   GLuint c_textureBytes;
+   GLuint c_agpTextureBytes;
+   GLuint c_texsrc_agp;
+   GLuint c_texsrc_card;
+   GLuint c_vertexBuffers;
+#endif
+
+   /* VBI
+    */
+   GLuint vbl_seq;
+   GLuint vblank_flags;
+   GLuint do_irqs;
+
+   /* Configuration cache
+    */
+   driOptionCache optionCache;
+};
+
+#define MACH64_CONTEXT(ctx)		((mach64ContextPtr)(ctx->DriverCtx))
+
+
+extern GLboolean mach64CreateContext( const __GLcontextModes *glVisual,
+				      __DRIcontextPrivate *driContextPriv,
+                                      void *sharedContextPrivate );
+
+extern void mach64DestroyContext( __DRIcontextPrivate * );
+
+extern GLboolean mach64MakeCurrent( __DRIcontextPrivate *driContextPriv,
+                                    __DRIdrawablePrivate *driDrawPriv,
+                                    __DRIdrawablePrivate *driReadPriv );
+
+extern GLboolean mach64UnbindContext( __DRIcontextPrivate *driContextPriv );
+
+/* ================================================================
+ * Byte ordering
+ */
+#include "X11/Xarch.h"
+
+#if X_BYTE_ORDER == X_LITTLE_ENDIAN
+#define LE32_IN( x )		( *(GLuint *)(x) )
+#define LE32_IN_FLOAT( x )	( *(GLfloat *)(x) )
+#define LE32_OUT( x, y )	do { *(GLuint *)(x) = (y); } while (0)
+#define LE32_OUT_FLOAT( x, y )	do { *(GLfloat *)(x) = (y); } while (0)
+#else
+#include <byteswap.h>
+#define LE32_IN( x )		bswap_32( *(GLuint *)(x) )
+#define LE32_IN_FLOAT( x )						\
+({									\
+   GLuint __tmp = bswap_32( *(GLuint *)(x) );				\
+   *(GLfloat *)&__tmp;							\
+})
+#define LE32_OUT( x, y )	do { *(GLuint *)(x) = bswap_32( y ); } while (0)
+#define LE32_OUT_FLOAT( x, y )						\
+do {									\
+   GLuint __tmp;							\
+   *(GLfloat *)&__tmp = (y);						\
+   *(GLuint *)(x) = bswap_32( __tmp );					\
+} while (0)
+#endif
+
+/* ================================================================
+ * DMA buffers
+ */
+
+#define DMALOCALS       CARD32 *buf=NULL; int requested=0; int outcount=0
+
+/* called while locked for interleaved client-side state emits */
+#define DMAGETPTR( dwords )					\
+do {								\
+   requested = (dwords);					\
+   buf = (CARD32 *)mach64AllocDmaLocked( mmesa, ((dwords)*4) );	\
+   outcount = 0;						\
+} while(0)
+
+#define DMAOUTREG( reg, val )				\
+do {							\
+   LE32_OUT( &buf[outcount++], ADRINDEX( reg ) );	\
+   LE32_OUT( &buf[outcount++], ( val ) );		\
+} while(0)
+
+#define DMAADVANCE()						\
+do {								\
+   if (outcount < requested) {					\
+      mmesa->vert_used -= (requested - outcount) * 4;	\
+   }								\
+} while(0)
+
+/* ================================================================
+ * Debugging:
+ */
+
+#define DO_DEBUG		1
+
+#if DO_DEBUG
+extern int MACH64_DEBUG;
+#else
+#define MACH64_DEBUG		0
+#endif
+
+#define DEBUG_ALWAYS_SYNC	0x001
+#define DEBUG_VERBOSE_API	0x002
+#define DEBUG_VERBOSE_MSG	0x004
+#define DEBUG_VERBOSE_LRU	0x008
+#define DEBUG_VERBOSE_DRI	0x010
+#define DEBUG_VERBOSE_IOCTL	0x020
+#define DEBUG_VERBOSE_PRIMS	0x040
+#define DEBUG_VERBOSE_COUNT	0x080
+#define DEBUG_NOWAIT		0x100
+#endif
+#endif /* __MACH64_CONTEXT_H__ */
diff --git a/src/mesa/drivers/dri/mach64/mach64_dd.c b/src/mesa/drivers/dri/mach64/mach64_dd.c
new file mode 100644
index 0000000000..82f4c740aa
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_dd.c
@@ -0,0 +1,134 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "mach64_context.h"
+#include "mach64_ioctl.h"
+#include "mach64_state.h"
+#include "mach64_vb.h"
+#include "mach64_dd.h"
+
+#include "context.h"
+#include "utils.h"
+
+#define DRIVER_DATE	"20030502"
+
+/* Return the current color buffer size.
+ */
+static void mach64DDGetBufferSize( GLframebuffer *buffer,
+				   GLuint *width, GLuint *height )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   LOCK_HARDWARE( mmesa );
+   *width  = mmesa->driDrawable->w;
+   *height = mmesa->driDrawable->h;
+   UNLOCK_HARDWARE( mmesa );
+}
+
+/* Return various strings for glGetString().
+ */
+static const GLubyte *mach64DDGetString( GLcontext *ctx, GLenum name )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   static GLubyte buffer[128];
+   unsigned   offset;
+   const char * card_name = "Mach64 [Rage Pro]";
+   GLuint agp_mode = mmesa->mach64Screen->IsPCI ? 0 :
+      mmesa->mach64Screen->AGPMode;
+
+   switch ( name ) {
+   case GL_VENDOR:
+      return (GLubyte*)"Gareth Hughes, Leif Delgass, Jos� Fonseca";
+
+   case GL_RENDERER:
+ 
+      offset = driGetRendererString( buffer, card_name, DRIVER_DATE,
+				     agp_mode );
+      return (GLubyte *)buffer;
+
+   default:
+      return NULL;
+   }
+}
+
+/* Send all commands to the hardware.  If vertex buffers or indirect
+ * buffers are in use, then we need to make sure they are sent to the
+ * hardware.  All commands that are normally sent to the ring are
+ * already considered `flushed'.
+ */
+static void mach64DDFlush( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   LOCK_HARDWARE( mmesa );
+   FLUSH_DMA_LOCKED( mmesa );
+   UNLOCK_HARDWARE( mmesa );
+
+#if ENABLE_PERF_BOXES
+   if ( mmesa->boxes ) {
+      LOCK_HARDWARE( mmesa );
+      mach64PerformanceBoxesLocked( mmesa );
+      UNLOCK_HARDWARE( mmesa );
+   }
+
+   /* Log the performance counters if necessary */
+   mach64PerformanceCounters( mmesa );
+#endif
+}
+
+/* Make sure all commands have been sent to the hardware and have
+ * completed processing.
+ */
+static void mach64DDFinish( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+#if ENABLE_PERF_BOXES
+   /* Bump the performance counter */
+   mmesa->c_drawWaits++;
+#endif
+
+   mach64DDFlush( ctx );
+   mach64WaitForIdle( mmesa );
+}
+
+/* Initialize the driver's misc functions.
+ */
+void mach64DDInitDriverFuncs( GLcontext *ctx )
+{
+   ctx->Driver.GetBufferSize	= mach64DDGetBufferSize;
+   ctx->Driver.ResizeBuffers    = _swrast_alloc_buffers;
+   ctx->Driver.GetString	= mach64DDGetString;
+   ctx->Driver.Finish		= mach64DDFinish;
+   ctx->Driver.Flush		= mach64DDFlush;
+   ctx->Driver.Error		= NULL;
+}
diff --git a/src/mesa/drivers/dri/mach64/mach64_dd.h b/src/mesa/drivers/dri/mach64/mach64_dd.h
new file mode 100644
index 0000000000..de5649919f
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_dd.h
@@ -0,0 +1,39 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#ifndef __MACH64_DD_H__
+#define __MACH64_DD_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+extern void mach64DDInitDriverFuncs( GLcontext *ctx );
+
+#endif
+#endif
diff --git a/src/mesa/drivers/dri/mach64/mach64_ioctl.c b/src/mesa/drivers/dri/mach64/mach64_ioctl.c
new file mode 100644
index 0000000000..06bb9c8f1a
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_ioctl.c
@@ -0,0 +1,911 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "mach64_context.h"
+#include "mach64_state.h"
+#include "mach64_ioctl.h"
+#include "mach64_tex.h"
+
+#include "imports.h"
+#include "macros.h"
+
+#include "swrast/swrast.h"
+
+#include "vblank.h"
+
+#define MACH64_TIMEOUT        10 /* the DRM already has a timeout, so keep this small */
+
+
+/* =============================================================
+ * Hardware vertex buffer handling
+ */
+
+/* Get a new VB from the pool of vertex buffers in AGP space.
+ */
+drmBufPtr mach64GetBufferLocked( mach64ContextPtr mmesa )
+{
+   int fd = mmesa->mach64Screen->driScreen->fd;
+   int index = 0;
+   int size = 0;
+   drmDMAReq dma;
+   drmBufPtr buf = NULL;
+   int to = 0;
+   int ret;
+
+   dma.context = mmesa->hHWContext;
+   dma.send_count = 0;
+   dma.send_list = NULL;
+   dma.send_sizes = NULL;
+   dma.flags = 0;
+   dma.request_count = 1;
+   dma.request_size = MACH64_BUFFER_SIZE;
+   dma.request_list = &index;
+   dma.request_sizes = &size;
+   dma.granted_count = 0;
+
+   while ( !buf && ( to++ < MACH64_TIMEOUT ) ) {
+      ret = drmDMA( fd, &dma );
+
+      if ( ret == 0 ) {
+	 buf = &mmesa->mach64Screen->buffers->list[index];
+	 buf->used = 0;
+#if ENABLE_PERF_BOXES
+	 /* Bump the performance counter */
+	 mmesa->c_vertexBuffers++;
+#endif
+	 return buf;
+      }
+   }
+
+   if ( !buf ) {
+      drmCommandNone( fd, DRM_MACH64_RESET );
+      UNLOCK_HARDWARE( mmesa );
+      fprintf( stderr, "Error: Could not get new VB... exiting\n" );
+      exit( -1 );
+   }
+
+   return buf;
+}
+
+void mach64FlushVerticesLocked( mach64ContextPtr mmesa )
+{
+   XF86DRIClipRectPtr pbox = mmesa->pClipRects;
+   int nbox = mmesa->numClipRects;
+   void *buffer = mmesa->vert_buf;
+   int count = mmesa->vert_used;
+   int prim = mmesa->hw_primitive;
+   int fd = mmesa->driScreen->fd;
+   drmMach64Vertex vertex;
+   int i, ret;
+
+   mmesa->num_verts = 0;
+   mmesa->vert_used = 0;
+
+   if ( !count )
+      return;
+
+   if ( mmesa->dirty & ~MACH64_UPLOAD_CLIPRECTS )
+      mach64EmitHwStateLocked( mmesa );
+
+   if ( !nbox )
+      count = 0;
+
+   if ( nbox > MACH64_NR_SAREA_CLIPRECTS )
+      mmesa->dirty |= MACH64_UPLOAD_CLIPRECTS;
+
+   if ( !count || !(mmesa->dirty & MACH64_UPLOAD_CLIPRECTS) ) {
+      /* FIXME: Is this really necessary */
+      if ( nbox == 1 )
+	 mmesa->sarea->nbox = 0;
+      else
+	 mmesa->sarea->nbox = nbox;
+
+      vertex.prim = prim;
+      vertex.buf = buffer;
+      vertex.used = count;
+      vertex.discard = 1;
+      ret = drmCommandWrite( fd, DRM_MACH64_VERTEX, &vertex, sizeof(drmMach64Vertex) );
+      if ( ret ) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "Error flushing vertex buffer: return = %d\n", ret );
+	 exit( -1 );
+      }
+
+   } else {
+
+      for ( i = 0 ; i < nbox ; ) {
+	 int nr = MIN2( i + MACH64_NR_SAREA_CLIPRECTS, nbox );
+	 XF86DRIClipRectPtr b = mmesa->sarea->boxes;
+	 int discard = 0;
+
+	 mmesa->sarea->nbox = nr - i;
+	 for ( ; i < nr ; i++ ) {
+	    *b++ = pbox[i];
+	 }
+
+	 /* Finished with the buffer?
+	  */
+	 if ( nr == nbox ) {
+	    discard = 1;
+	 }
+
+	 mmesa->sarea->dirty |= MACH64_UPLOAD_CLIPRECTS;
+	 
+	 vertex.prim = prim;
+	 vertex.buf = buffer;
+	 vertex.used = count;
+	 vertex.discard = discard;
+	 ret = drmCommandWrite( fd, DRM_MACH64_VERTEX, &vertex, sizeof(drmMach64Vertex) );
+	 if ( ret ) {
+	    UNLOCK_HARDWARE( mmesa );
+	    fprintf( stderr, "Error flushing vertex buffer: return = %d\n", ret );
+	    exit( -1 );
+	 }
+      }
+   }
+
+   mmesa->dirty &= ~MACH64_UPLOAD_CLIPRECTS;
+}
+
+/* ================================================================
+ * Texture uploads
+ */
+
+void mach64FireBlitLocked( mach64ContextPtr mmesa, drmBufPtr buffer,
+			   GLint offset, GLint pitch, GLint format,
+			   GLint x, GLint y, GLint width, GLint height )
+{
+   drmMach64Blit blit;
+   GLint ret;
+
+   blit.idx = buffer->idx;
+   blit.offset = offset;
+   blit.pitch = pitch;
+   blit.format = format;
+   blit.x = x;
+   blit.y = y;
+   blit.width = width;
+   blit.height = height;
+
+   ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_BLIT, 
+                          &blit, sizeof(drmMach64Blit) );
+
+   if ( ret ) {
+      UNLOCK_HARDWARE( mmesa );
+      fprintf( stderr, "DRM_MACH64_BLIT: return = %d\n", ret );
+      exit( -1 );
+   }
+}
+
+
+/* ================================================================
+ * SwapBuffers with client-side throttling
+ */
+static void delay( void ) {
+/* Prevent an optimizing compiler from removing a spin loop */
+}
+
+/* Throttle the frame rate -- only allow MACH64_MAX_QUEUED_FRAMES
+ * pending swap buffers requests at a time.
+ *
+ * GH: We probably don't want a timeout here, as we can wait as
+ * long as we want for a frame to complete.  If it never does, then
+ * the card has locked.
+ */
+static int mach64WaitForFrameCompletion( mach64ContextPtr mmesa )
+{
+   int fd = mmesa->driFd;
+   int i;
+   int wait = 0;
+   int frames;
+
+   while ( 1 ) {
+      drmMach64GetParam gp;
+      int ret;
+
+      if ( mmesa->sarea->frames_queued < MACH64_MAX_QUEUED_FRAMES ) {
+	 break;
+      }
+
+      if (MACH64_DEBUG & DEBUG_NOWAIT) {
+	 return 1;
+      }
+
+      gp.param = MACH64_PARAM_FRAMES_QUEUED;
+      gp.value = &frames; /* also copied into sarea->frames_queued by DRM */
+
+      ret = drmCommandWriteRead( fd, DRM_MACH64_GETPARAM, &gp, sizeof(gp) );
+
+      if ( ret ) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "DRM_MACH64_GETPARAM: return = %d\n", ret );
+	 exit( -1 );
+      }
+
+      /* Spin in place a bit so we aren't hammering the register */
+      wait++;
+
+      for ( i = 0 ; i < 1024 ; i++ ) {
+	 delay();
+      }
+
+   }
+
+   return wait;
+}
+
+/* Copy the back color buffer to the front color buffer.
+ */
+void mach64CopyBuffer( const __DRIdrawablePrivate *dPriv )
+{
+   mach64ContextPtr mmesa;
+   GLint nbox, i, ret;
+   XF86DRIClipRectPtr pbox;
+   GLboolean missed_target;
+
+   assert(dPriv);
+   assert(dPriv->driContextPriv);
+   assert(dPriv->driContextPriv->driverPrivate);
+
+   mmesa = (mach64ContextPtr) dPriv->driContextPriv->driverPrivate;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "\n********************************\n" );
+      fprintf( stderr, "\n%s( %p )\n\n",
+	       __FUNCTION__, mmesa->glCtx );
+      fflush( stderr );
+   }
+
+   /* Flush any outstanding vertex buffers */
+   FLUSH_BATCH( mmesa );
+
+   LOCK_HARDWARE( mmesa );
+
+   /* Throttle the frame rate -- only allow one pending swap buffers
+    * request at a time.
+    */
+   if ( !mach64WaitForFrameCompletion( mmesa ) ) {
+      mmesa->hardwareWentIdle = 1;
+   } else {
+      mmesa->hardwareWentIdle = 0;
+   }
+
+#if ENABLE_PERF_BOXES
+   if ( mmesa->boxes ) {
+      mach64PerformanceBoxesLocked( mmesa );
+   }
+#endif
+
+   UNLOCK_HARDWARE( mmesa );
+   driWaitForVBlank( dPriv, &mmesa->vbl_seq, mmesa->vblank_flags, &missed_target );
+   LOCK_HARDWARE( mmesa );
+
+   /* use front buffer cliprects */
+   nbox = dPriv->numClipRects;
+   pbox = dPriv->pClipRects;
+
+   for ( i = 0 ; i < nbox ; ) {
+      GLint nr = MIN2( i + MACH64_NR_SAREA_CLIPRECTS , nbox );
+      XF86DRIClipRectPtr b = mmesa->sarea->boxes;
+      GLint n = 0;
+
+      for ( ; i < nr ; i++ ) {
+	 *b++ = pbox[i];
+	 n++;
+      }
+      mmesa->sarea->nbox = n;
+
+      ret = drmCommandNone( mmesa->driFd, DRM_MACH64_SWAP );
+
+      if ( ret ) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "DRM_MACH64_SWAP: return = %d\n", ret );
+	 exit( -1 );
+      }
+   }
+
+   if ( MACH64_DEBUG & DEBUG_ALWAYS_SYNC ) {
+      mach64WaitForIdleLocked( mmesa );
+   }
+
+   UNLOCK_HARDWARE( mmesa );
+
+   mmesa->dirty |= (MACH64_UPLOAD_CONTEXT |
+		    MACH64_UPLOAD_MISC |
+		    MACH64_UPLOAD_CLIPRECTS);
+
+#if ENABLE_PERF_BOXES
+   /* Log the performance counters if necessary */
+   mach64PerformanceCounters( mmesa );
+#endif
+}
+
+#if ENABLE_PERF_BOXES
+/* ================================================================
+ * Performance monitoring
+ */
+
+void mach64PerformanceCounters( mach64ContextPtr mmesa )
+{
+
+   if (MACH64_DEBUG & DEBUG_VERBOSE_COUNT) {
+      /* report performance counters */
+      fprintf( stderr, "mach64CopyBuffer: vertexBuffers:%i drawWaits:%i clears:%i\n",
+	       mmesa->c_vertexBuffers, mmesa->c_drawWaits, mmesa->c_clears );
+   }
+
+   mmesa->c_vertexBuffers = 0;
+   mmesa->c_drawWaits = 0;
+   mmesa->c_clears = 0;
+
+   if ( mmesa->c_textureSwaps || mmesa->c_textureBytes || mmesa->c_agpTextureBytes ) {
+      if (MACH64_DEBUG & DEBUG_VERBOSE_COUNT) {
+	 fprintf( stderr, "    textureSwaps:%i  textureBytes:%i agpTextureBytes:%i\n",
+		  mmesa->c_textureSwaps, mmesa->c_textureBytes, mmesa->c_agpTextureBytes );
+      }
+      mmesa->c_textureSwaps = 0;
+      mmesa->c_textureBytes = 0;
+      mmesa->c_agpTextureBytes = 0;
+   }
+
+   mmesa->c_texsrc_agp = 0;
+   mmesa->c_texsrc_card = 0;
+
+   if (MACH64_DEBUG & DEBUG_VERBOSE_COUNT)
+      fprintf( stderr, "---------------------------------------------------------\n" );
+}
+
+
+void mach64PerformanceBoxesLocked( mach64ContextPtr mmesa )
+{
+   GLint ret;
+   drmMach64Clear clear;
+   GLint x, y, w, h;
+   GLuint color;
+   GLint nbox;
+   GLint x1, y1, x2, y2;
+   XF86DRIClipRectPtr b = mmesa->sarea->boxes;
+
+   /* save cliprects */
+   nbox = mmesa->sarea->nbox;
+   x1 = b[0].x1;
+   y1 = b[0].y1;
+   x2 = b[0].x2;
+   y2 = b[0].y2;
+ 
+   /* setup a single cliprect and call the clear ioctl for each box */
+   mmesa->sarea->nbox = 1;
+
+   w = h = 8;
+   x = mmesa->drawX;
+   y = mmesa->drawY;
+   b[0].x1 = x;
+   b[0].x2 = x + w;
+   b[0].y1 = y;
+   b[0].y2 = y + h;
+
+   clear.flags = MACH64_BACK;
+   clear.clear_depth = 0;
+
+   /* Red box if DDFinish was called to wait for rendering to complete */
+   if ( mmesa->c_drawWaits ) {
+      color = mach64PackColor( mmesa->mach64Screen->cpp, 255, 0, 0, 0 );
+      
+      clear.x = x;
+      clear.y = y;
+      clear.w = w;
+      clear.h = h;
+      clear.clear_color = color;
+
+      ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_CLEAR,
+			     &clear, sizeof(drmMach64Clear) );
+
+      if (ret < 0) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "DRM_MACH64_CLEAR: return = %d\n", ret );
+	 exit( -1 );
+      }
+
+   }
+
+   x += w;
+   b[0].x1 = x;
+   b[0].x2 = x + w;
+
+   /* draw a green box if we had to wait for previous frame(s) to complete */
+   if ( !mmesa->hardwareWentIdle ) {
+      color = mach64PackColor( mmesa->mach64Screen->cpp, 0, 255, 0, 0 );
+      
+      clear.x = x;
+      clear.y = y;
+      clear.w = w;
+      clear.h = h;
+      clear.clear_color = color;
+
+      ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_CLEAR,
+			     &clear, sizeof(drmMach64Clear) );
+
+      if (ret < 0) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "DRM_MACH64_CLEAR: return = %d\n", ret );
+	 exit( -1 );
+      }
+
+   }
+
+   x += w;
+   w = 20;
+   b[0].x1 = x;
+
+   /* show approx. ratio of AGP/card textures used - Blue = AGP, Purple = Card */
+   if ( mmesa->c_texsrc_agp || mmesa->c_texsrc_card ) {
+      color = mach64PackColor( mmesa->mach64Screen->cpp, 0, 0, 255, 0 );
+      w = ((GLfloat)mmesa->c_texsrc_agp / (GLfloat)(mmesa->c_texsrc_agp + mmesa->c_texsrc_card))*20;
+      if (w > 1) {
+
+	 b[0].x2 = x + w;
+
+	 clear.x = x;
+	 clear.y = y;
+	 clear.w = w;
+	 clear.h = h;
+	 clear.clear_color = color;
+
+	 ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_CLEAR,
+				&clear, sizeof(drmMach64Clear) );
+
+	 if (ret < 0) {
+	    UNLOCK_HARDWARE( mmesa );
+	    fprintf( stderr, "DRM_MACH64_CLEAR: return = %d\n", ret );
+	    exit( -1 );
+	 }
+      }
+
+      x += w;
+      w = 20 - w;
+
+      if (w > 1) {
+	 b[0].x1 = x;
+	 b[0].x2 = x + w;
+
+	 color = mach64PackColor( mmesa->mach64Screen->cpp, 255, 0, 255, 0 );
+
+	 clear.x = x;
+	 clear.y = y;
+	 clear.w = w;
+	 clear.h = h;
+	 clear.clear_color = color;
+
+	 ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_CLEAR,
+				&clear, sizeof(drmMach64Clear) );
+
+	 if (ret < 0) {
+	    UNLOCK_HARDWARE( mmesa );
+	    fprintf( stderr, "DRM_MACH64_CLEAR: return = %d\n", ret );
+	    exit( -1 );
+	 }
+      }
+   }  
+
+   x += w;
+   w = 8;
+   b[0].x1 = x;
+   b[0].x2 = x + w;
+
+   /* Yellow box if we swapped textures */
+   if ( mmesa->c_textureSwaps ) {
+      color = mach64PackColor( mmesa->mach64Screen->cpp, 255, 255, 0, 0 );
+
+      clear.x = x;
+      clear.y = y;
+      clear.w = w;
+      clear.h = h;
+      clear.clear_color = color;
+
+      ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_CLEAR,
+				&clear, sizeof(drmMach64Clear) );
+
+      if (ret < 0) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "DRM_MACH64_CLEAR: return = %d\n", ret );
+	 exit( -1 );
+      }
+      
+   }
+
+   h = 4;
+   x += 8;
+   b[0].x1 = x;
+   b[0].y2 = y + h;
+
+   /* Purple bar for card memory texture blits/uploads */
+   if ( mmesa->c_textureBytes ) {
+      color = mach64PackColor( mmesa->mach64Screen->cpp, 255, 0, 255, 0 );
+      w = mmesa->c_textureBytes / 16384;
+      if ( w <= 0 ) 
+	 w = 1; 
+      if (w > (mmesa->driDrawable->w - 44))
+	 w = mmesa->driDrawable->w - 44;
+
+      b[0].x2 = x + w;
+
+      clear.x = x;
+      clear.y = y;
+      clear.w = w;
+      clear.h = h;
+      clear.clear_color = color;
+
+      ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_CLEAR,
+				&clear, sizeof(drmMach64Clear) );
+
+      if (ret < 0) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "DRM_MACH64_CLEAR: return = %d\n", ret );
+	 exit( -1 );
+      }
+   }
+
+   /* Blue bar for AGP memory texture blits/uploads */
+   if ( mmesa->c_agpTextureBytes ) {
+      color = mach64PackColor( mmesa->mach64Screen->cpp, 0, 0, 255, 0 );
+      w = mmesa->c_agpTextureBytes / 16384;
+      if ( w <= 0 ) 
+	 w = 1; 
+      if (w > (mmesa->driDrawable->w - 44))
+	 w = mmesa->driDrawable->w - 44;
+
+      y += 4;
+      b[0].x2 = x + w;
+      b[0].y1 = y;
+      b[0].y2 = y + h;
+
+      clear.x = x;
+      clear.y = y;
+      clear.w = w;
+      clear.h = h;
+      clear.clear_color = color;
+
+      ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_CLEAR,
+				&clear, sizeof(drmMach64Clear) );
+
+      if (ret < 0) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "DRM_MACH64_CLEAR: return = %d\n", ret );
+	 exit( -1 );
+      }
+   }
+
+   /* Pink bar for number of vertex buffers used */
+   if ( mmesa->c_vertexBuffers ) {
+      color = mach64PackColor( mmesa->mach64Screen->cpp, 196, 128, 128, 0 );
+
+      w = mmesa->c_vertexBuffers;
+      if (w > (mmesa->driDrawable->w))
+	 w = mmesa->driDrawable->w;
+
+      h = 8;
+      x = mmesa->drawX;
+      y = mmesa->drawY + 8;
+      b[0].x1 = x;
+      b[0].x2 = x + w;
+      b[0].y1 = y;
+      b[0].y2 = y + h;
+
+      clear.x = x;
+      clear.y = y;
+      clear.w = w;
+      clear.h = h;
+      clear.clear_color = color;
+
+      ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_CLEAR,
+				&clear, sizeof(drmMach64Clear) );
+
+      if (ret < 0) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "DRM_MACH64_CLEAR: return = %d\n", ret );
+	 exit( -1 );
+      }
+   }
+
+   /* restore cliprects */
+   mmesa->sarea->nbox = nbox;
+   b[0].x1 = x1;
+   b[0].y1 = y1;
+   b[0].x2 = x2;
+   b[0].y2 = y2;
+
+}
+
+#endif
+
+/* ================================================================
+ * Buffer clear
+ */
+
+static void mach64DDClear( GLcontext *ctx, GLbitfield mask, GLboolean all,
+			   GLint cx, GLint cy, GLint cw, GLint ch )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT( ctx );
+   __DRIdrawablePrivate *dPriv = mmesa->driDrawable;
+   drmMach64Clear clear;
+   GLuint flags = 0;
+   GLint i;
+   GLint ret;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s: all=%d %d,%d %dx%d\n",
+	       __FUNCTION__, all, cx, cy, cw, ch );
+   }
+
+#if ENABLE_PERF_BOXES
+   /* Bump the performance counter */
+   mmesa->c_clears++;
+#endif
+
+   FLUSH_BATCH( mmesa );
+
+   /* The only state changes we care about here are the RGBA colormask
+    * and scissor/clipping.  We'll just update that state, if needed.
+    */
+   if ( mmesa->new_state & (MACH64_NEW_MASKS | MACH64_NEW_CLIP) ) {
+      const GLuint save_state = mmesa->new_state;
+      mmesa->new_state &= (MACH64_NEW_MASKS | MACH64_NEW_CLIP);
+      mach64DDUpdateHWState( ctx );
+      mmesa->new_state = save_state & ~(MACH64_NEW_MASKS | MACH64_NEW_CLIP);
+   }
+
+   if ( mask & DD_FRONT_LEFT_BIT ) {
+      flags |= MACH64_FRONT;
+      mask &= ~DD_FRONT_LEFT_BIT;
+   }
+
+   if ( mask & DD_BACK_LEFT_BIT ) {
+      flags |= MACH64_BACK;
+      mask &= ~DD_BACK_LEFT_BIT;
+   }
+
+   if ( ( mask & DD_DEPTH_BIT ) && ctx->Depth.Mask ) {
+      flags |= MACH64_DEPTH;
+      mask &= ~DD_DEPTH_BIT;
+   }
+
+   if ( mask )
+      _swrast_Clear( ctx, mask, all, cx, cy, cw, ch );
+
+   if ( !flags )
+      return;
+
+   LOCK_HARDWARE( mmesa );
+
+   /* This needs to be in the locked region, so updated drawable origin is used */
+   /* Flip top to bottom */
+   cx += mmesa->drawX;
+   cy  = mmesa->drawY + dPriv->h - cy - ch;
+
+   /* HACK?
+    */
+   if ( mmesa->dirty & ~MACH64_UPLOAD_CLIPRECTS ) {
+      mach64EmitHwStateLocked( mmesa );
+   }
+
+   for ( i = 0 ; i < mmesa->numClipRects ; ) {
+      int nr = MIN2( i + MACH64_NR_SAREA_CLIPRECTS, mmesa->numClipRects );
+      XF86DRIClipRectPtr box = mmesa->pClipRects;
+      XF86DRIClipRectPtr b = mmesa->sarea->boxes;
+      GLint n = 0;
+
+      if ( !all ) {
+	 for ( ; i < nr ; i++ ) {
+	    GLint x = box[i].x1;
+	    GLint y = box[i].y1;
+	    GLint w = box[i].x2 - x;
+	    GLint h = box[i].y2 - y;
+
+	    if ( x < cx ) w -= cx - x, x = cx;
+	    if ( y < cy ) h -= cy - y, y = cy;
+	    if ( x + w > cx + cw ) w = cx + cw - x;
+	    if ( y + h > cy + ch ) h = cy + ch - y;
+	    if ( w <= 0 ) continue;
+	    if ( h <= 0 ) continue;
+
+	    b->x1 = x;
+	    b->y1 = y;
+	    b->x2 = x + w;
+	    b->y2 = y + h;
+	    b++;
+	    n++;
+	 }
+      } else {
+	 for ( ; i < nr ; i++ ) {
+	    *b++ = box[i];
+	    n++;
+	 }
+      }
+
+      mmesa->sarea->nbox = n;
+
+      if ( MACH64_DEBUG & DEBUG_VERBOSE_IOCTL ) {
+	 fprintf( stderr,
+		  "DRM_MACH64_CLEAR: flag 0x%x color %x depth %x nbox %d\n",
+		  flags,
+		  (GLuint)mmesa->ClearColor,
+		  (GLuint)mmesa->ClearDepth,
+		  mmesa->sarea->nbox );
+      }
+
+      clear.flags = flags;
+      clear.x = cx;
+      clear.y = cy;
+      clear.w = cw;
+      clear.h = ch;
+      clear.clear_color = mmesa->ClearColor;
+      clear.clear_depth = mmesa->ClearDepth;
+
+      ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_CLEAR,
+			     &clear, sizeof(drmMach64Clear) );
+
+      if ( ret ) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "DRM_MACH64_CLEAR: return = %d\n", ret );
+	 exit( -1 );
+      }
+   }
+
+   UNLOCK_HARDWARE( mmesa );
+
+   mmesa->dirty |= (MACH64_UPLOAD_CONTEXT |
+		    MACH64_UPLOAD_MISC |
+		    MACH64_UPLOAD_CLIPRECTS);
+
+}
+
+
+void mach64WaitForIdleLocked( mach64ContextPtr mmesa )
+{
+   int fd = mmesa->driFd;
+   int to = 0;
+   int ret;
+
+   do {
+      ret = drmCommandNone( fd, DRM_MACH64_IDLE );
+   } while ( ( ret == -EBUSY ) && ( to++ < MACH64_TIMEOUT ) );
+
+   if ( ret < 0 ) {
+      drmCommandNone( fd, DRM_MACH64_RESET );
+      UNLOCK_HARDWARE( mmesa );
+      fprintf( stderr, "Error: Mach64 timed out... exiting\n" );
+      exit( -1 );
+   }
+}
+
+/* Flush the DMA queue to the hardware */
+void mach64FlushDMALocked( mach64ContextPtr mmesa )
+{
+   int fd = mmesa->driFd;
+   int ret;
+
+   ret = drmCommandNone( fd, DRM_MACH64_FLUSH );
+
+   if ( ret < 0 ) {
+      drmCommandNone( fd, DRM_MACH64_RESET );
+      UNLOCK_HARDWARE( mmesa );
+      fprintf( stderr, "Error flushing DMA... exiting\n" );
+      exit( -1 );
+   }
+
+   mmesa->dirty |= (MACH64_UPLOAD_CONTEXT |
+		    MACH64_UPLOAD_MISC |
+		    MACH64_UPLOAD_CLIPRECTS);
+
+}
+
+/* For client-side state emits - currently unused */
+void mach64UploadHwStateLocked( mach64ContextPtr mmesa )
+{
+   ATISAREAPrivPtr sarea = mmesa->sarea;
+   
+   mach64_context_regs_t *regs = &sarea->ContextState;
+   unsigned int dirty = sarea->dirty;
+   CARD32 offset = ((regs->tex_size_pitch & 0xf0) >> 2);
+
+   DMALOCALS;
+
+   DMAGETPTR( 19*2 );
+
+   if ( dirty & MACH64_UPLOAD_MISC ) {
+      DMAOUTREG( MACH64_DP_MIX, regs->dp_mix );
+      DMAOUTREG( MACH64_DP_SRC, regs->dp_src );
+      DMAOUTREG( MACH64_CLR_CMP_CNTL, regs->clr_cmp_cntl );
+      DMAOUTREG( MACH64_GUI_TRAJ_CNTL, regs->gui_traj_cntl );
+      DMAOUTREG( MACH64_SC_LEFT_RIGHT, regs->sc_left_right );
+      DMAOUTREG( MACH64_SC_TOP_BOTTOM, regs->sc_top_bottom );
+      sarea->dirty &= ~MACH64_UPLOAD_MISC;
+   }
+
+   if ( dirty & MACH64_UPLOAD_DST_OFF_PITCH ) {
+      DMAOUTREG( MACH64_DST_OFF_PITCH, regs->dst_off_pitch );
+      sarea->dirty &= ~MACH64_UPLOAD_DST_OFF_PITCH;
+   }
+   if ( dirty & MACH64_UPLOAD_Z_OFF_PITCH ) {
+      DMAOUTREG( MACH64_Z_OFF_PITCH, regs->z_off_pitch );
+      sarea->dirty &= ~MACH64_UPLOAD_Z_OFF_PITCH;
+   }
+   if ( dirty & MACH64_UPLOAD_Z_ALPHA_CNTL ) {
+      DMAOUTREG( MACH64_Z_CNTL, regs->z_cntl );
+      DMAOUTREG( MACH64_ALPHA_TST_CNTL, regs->alpha_tst_cntl );
+      sarea->dirty &= ~MACH64_UPLOAD_Z_ALPHA_CNTL;
+   }
+   if ( dirty & MACH64_UPLOAD_SCALE_3D_CNTL ) {
+      DMAOUTREG( MACH64_SCALE_3D_CNTL, regs->scale_3d_cntl );
+      sarea->dirty &= ~MACH64_UPLOAD_SCALE_3D_CNTL;
+   }
+   if ( dirty & MACH64_UPLOAD_DP_FOG_CLR ) {
+      DMAOUTREG( MACH64_DP_FOG_CLR, regs->dp_fog_clr );
+      sarea->dirty &= ~MACH64_UPLOAD_DP_FOG_CLR;
+   }
+   if ( dirty & MACH64_UPLOAD_DP_WRITE_MASK ) {
+      DMAOUTREG( MACH64_DP_WRITE_MASK, regs->dp_write_mask );
+      sarea->dirty &= ~MACH64_UPLOAD_DP_WRITE_MASK;
+   }
+   if ( dirty & MACH64_UPLOAD_DP_PIX_WIDTH ) {
+      DMAOUTREG( MACH64_DP_PIX_WIDTH, regs->dp_pix_width );
+      sarea->dirty &= ~MACH64_UPLOAD_DP_PIX_WIDTH;
+   }
+   if ( dirty & MACH64_UPLOAD_SETUP_CNTL ) {
+      DMAOUTREG( MACH64_SETUP_CNTL, regs->setup_cntl );
+      sarea->dirty &= ~MACH64_UPLOAD_SETUP_CNTL;
+   }
+
+   if ( dirty & MACH64_UPLOAD_TEXTURE ) {
+      DMAOUTREG( MACH64_TEX_SIZE_PITCH, regs->tex_size_pitch );
+      DMAOUTREG( MACH64_TEX_CNTL, regs->tex_cntl );
+      DMAOUTREG( MACH64_SECONDARY_TEX_OFF, regs->secondary_tex_off );
+      DMAOUTREG( MACH64_TEX_0_OFF + offset, regs->tex_offset );
+      sarea->dirty &= ~MACH64_UPLOAD_TEXTURE;
+   }
+
+#if 0
+   if ( dirty & MACH64_UPLOAD_CLIPRECTS ) {
+      DMAOUTREG( MACH64_SC_LEFT_RIGHT, regs->sc_left_right );
+      DMAOUTREG( MACH64_SC_TOP_BOTTOM, regs->sc_top_bottom );
+      sarea->dirty &= ~MACH64_UPLOAD_CLIPRECTS;
+   }
+#endif
+
+   sarea->dirty = 0;
+
+   DMAADVANCE();
+}
+
+void mach64DDInitIoctlFuncs( GLcontext *ctx )
+{
+    ctx->Driver.Clear = mach64DDClear;
+}
diff --git a/src/mesa/drivers/dri/mach64/mach64_ioctl.h b/src/mesa/drivers/dri/mach64/mach64_ioctl.h
new file mode 100644
index 0000000000..519ee985eb
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_ioctl.h
@@ -0,0 +1,156 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#ifndef __MACH64_IOCTL_H__
+#define __MACH64_IOCTL_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+#include "mach64_dri.h"
+#include "mach64_reg.h"
+#include "mach64_lock.h"
+
+#include "xf86drm.h"
+#include "mach64_common.h"
+
+#define MACH64_BUFFER_MAX_DWORDS	(MACH64_BUFFER_SIZE / sizeof(CARD32))
+
+
+extern drmBufPtr mach64GetBufferLocked( mach64ContextPtr mmesa );
+extern void mach64FlushVerticesLocked( mach64ContextPtr mmesa );
+extern void mach64FlushDMALocked( mach64ContextPtr mmesa );
+extern void mach64UploadHwStateLocked( mach64ContextPtr mmesa );
+
+static __inline void *mach64AllocDmaLow( mach64ContextPtr mmesa, int bytes )
+{
+   CARD32 *head;
+
+   if ( mmesa->vert_used + bytes > mmesa->vert_total ) {
+      LOCK_HARDWARE( mmesa );
+      mach64FlushVerticesLocked( mmesa );
+      UNLOCK_HARDWARE( mmesa );
+   }
+
+   head = (CARD32 *)((char *)mmesa->vert_buf + mmesa->vert_used);
+   mmesa->vert_used += bytes;
+
+   return head;
+}
+
+static __inline void *mach64AllocDmaLocked( mach64ContextPtr mmesa, int bytes )
+{
+   CARD32 *head;
+
+   if ( mmesa->vert_used + bytes > mmesa->vert_total ) {
+      mach64FlushVerticesLocked( mmesa );
+   }
+
+   head = (CARD32 *)((char *)mmesa->vert_buf + mmesa->vert_used);
+   mmesa->vert_used += bytes;
+
+   return head;
+}
+
+extern void mach64FireBlitLocked( mach64ContextPtr mmesa, drmBufPtr buffer,
+				  GLint offset, GLint pitch, GLint format,
+				  GLint x, GLint y, GLint width, GLint height );
+
+extern void mach64CopyBuffer( const __DRIdrawablePrivate *dPriv );
+#if ENABLE_PERF_BOXES
+extern void mach64PerformanceCounters( mach64ContextPtr mmesa );
+extern void mach64PerformanceBoxesLocked( mach64ContextPtr mmesa );
+#endif
+extern void mach64WaitForIdleLocked( mach64ContextPtr mmesa );
+
+
+extern void mach64DDInitIoctlFuncs( GLcontext *ctx );
+
+
+/* ================================================================
+ * Helper macros:
+ */
+
+#define FLUSH_BATCH( mmesa )						\
+do {									\
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_IOCTL )				\
+      fprintf( stderr, "FLUSH_BATCH in %s\n", __FUNCTION__ );		\
+   if ( mmesa->vert_used ) {						\
+      mach64FlushVertices( mmesa );					\
+   }									\
+} while (0)
+
+/* According to a comment in ATIMach64Sync (atimach64.c) in the DDX:
+ *
+ * "For VTB's and later, the first CPU read of the framebuffer will return
+ * zeroes [...] This appears to be due to some kind of engine
+ * caching of framebuffer data I haven't found any way of disabling, or
+ * otherwise circumventing."
+ */
+#define FINISH_DMA_LOCKED( mmesa )					\
+do {									\
+   CARD32 _tmp;								\
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_IOCTL )				\
+      fprintf( stderr, "FINISH_DMA_LOCKED in %s\n", __FUNCTION__ );	\
+   if ( mmesa->vert_used ) {						\
+      mach64FlushVerticesLocked( mmesa );				\
+   }									\
+   mach64WaitForIdleLocked( mmesa );					\
+   /* pre-read framebuffer to counter caching problem */		\
+   _tmp = *(volatile CARD32 *)mmesa->driScreen->pFB;			\
+} while (0)
+
+#define FLUSH_DMA_LOCKED( mmesa )					\
+do {									\
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_IOCTL )				\
+      fprintf( stderr, "FLUSH_DMA_LOCKED in %s\n", __FUNCTION__ );	\
+   if ( mmesa->vert_used ) {						\
+      mach64FlushVerticesLocked( mmesa );				\
+   }									\
+   mach64FlushDMALocked( mmesa );					\
+} while (0)
+
+#define mach64FlushVertices( mmesa )					\
+do {									\
+   LOCK_HARDWARE( mmesa );						\
+   mach64FlushVerticesLocked( mmesa );					\
+   UNLOCK_HARDWARE( mmesa );						\
+} while (0)
+
+#define mach64WaitForIdle( mmesa )		\
+do {						\
+   LOCK_HARDWARE( mmesa );			\
+   mach64WaitForIdleLocked( mmesa );		\
+   UNLOCK_HARDWARE( mmesa );			\
+} while (0)
+
+
+#endif
+#endif /* __MACH64_IOCTL_H__ */
diff --git a/src/mesa/drivers/dri/mach64/mach64_lock.c b/src/mesa/drivers/dri/mach64/mach64_lock.c
new file mode 100644
index 0000000000..465bcb3ab8
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_lock.c
@@ -0,0 +1,93 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "mach64_context.h"
+#include "mach64_state.h"
+#include "mach64_lock.h"
+#include "mach64_tex.h"
+
+#if DEBUG_LOCKING
+char *prevLockFile = NULL;
+int   prevLockLine = 0;
+#endif
+
+
+/* Update the hardware state.  This is called if another context has
+ * grabbed the hardware lock, which includes the X server.  This
+ * function also updates the driver's window state after the X server
+ * moves, resizes or restacks a window -- the change will be reflected
+ * in the drawable position and clip rects.  Since the X server grabs
+ * the hardware lock when it changes the window state, this routine will
+ * automatically be called after such a change.
+ */
+void mach64GetLock( mach64ContextPtr mmesa, GLuint flags )
+{
+   __DRIdrawablePrivate *dPriv = mmesa->driDrawable;
+   __DRIscreenPrivate *sPriv = mmesa->driScreen;
+   ATISAREAPrivPtr sarea = mmesa->sarea;
+   int i;
+
+   drmGetLock( mmesa->driFd, mmesa->hHWContext, flags );
+
+   /* The window might have moved, so we might need to get new clip
+    * rects.
+    *
+    * NOTE: This releases and regrabs the hw lock to allow the X server
+    * to respond to the DRI protocol request for new drawable info.
+    * Since the hardware state depends on having the latest drawable
+    * clip rects, all state checking must be done _after_ this call.
+    */
+   DRI_VALIDATE_DRAWABLE_INFO( sPriv, dPriv ); 
+
+   if ( mmesa->lastStamp != dPriv->lastStamp ) {
+      mmesa->lastStamp = dPriv->lastStamp;
+      if (mmesa->glCtx->Color._DrawDestMask == BACK_LEFT_BIT)
+         mach64SetCliprects( mmesa->glCtx, GL_BACK_LEFT );
+      else
+         mach64SetCliprects( mmesa->glCtx, GL_FRONT_LEFT );
+      mach64CalcViewport( mmesa->glCtx );
+   }
+
+   mmesa->dirty |= (MACH64_UPLOAD_CONTEXT
+		    | MACH64_UPLOAD_MISC
+		    | MACH64_UPLOAD_CLIPRECTS);
+
+   if ( sarea->ctxOwner != mmesa->hHWContext ) {
+      sarea->ctxOwner = mmesa->hHWContext;
+      mmesa->dirty = MACH64_UPLOAD_ALL;
+   }
+
+   for ( i = mmesa->firstTexHeap ; i < mmesa->lastTexHeap ; i++ ) {
+      if ( mmesa->texHeap[i] && (sarea->texAge[i] != mmesa->lastTexAge[i]) ) {
+	 mach64AgeTextures( mmesa, i );
+      }
+   }
+}
diff --git a/src/mesa/drivers/dri/mach64/mach64_lock.h b/src/mesa/drivers/dri/mach64/mach64_lock.h
new file mode 100644
index 0000000000..3a77c1c690
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_lock.h
@@ -0,0 +1,107 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#ifndef __MACH64_LOCK_H__
+#define __MACH64_LOCK_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+extern void mach64GetLock( mach64ContextPtr mmesa, GLuint flags );
+
+
+/* Turn DEBUG_LOCKING on to find locking conflicts.
+ */
+#define DEBUG_LOCKING	1
+
+#if DEBUG_LOCKING
+extern char *prevLockFile;
+extern int   prevLockLine;
+
+#define DEBUG_LOCK()							\
+   do {									\
+      prevLockFile = (__FILE__);					\
+      prevLockLine = (__LINE__);					\
+   } while (0)
+
+#define DEBUG_RESET()							\
+   do {									\
+      prevLockFile = 0;							\
+      prevLockLine = 0;							\
+   } while (0)
+
+#define DEBUG_CHECK_LOCK()						\
+   do {									\
+      if ( prevLockFile ) {						\
+	 fprintf( stderr,						\
+		  "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n",	\
+		  prevLockFile, prevLockLine, __FILE__, __LINE__ );	\
+	 exit( 1 );							\
+      }									\
+   } while (0)
+
+#else
+
+#define DEBUG_LOCK()
+#define DEBUG_RESET()
+#define DEBUG_CHECK_LOCK()
+
+#endif
+
+/*
+ * !!! We may want to separate locks from locks with validation.  This
+ * could be used to improve performance for those things commands that
+ * do not do any drawing !!!
+ */
+
+/* Lock the hardware and validate our state.
+ */
+#define LOCK_HARDWARE( mmesa )						\
+   do {									\
+      char __ret = 0;							\
+      DEBUG_CHECK_LOCK();						\
+      DRM_CAS( mmesa->driHwLock, mmesa->hHWContext,			\
+	       (DRM_LOCK_HELD | mmesa->hHWContext), __ret );		\
+      if ( __ret )							\
+	 mach64GetLock( mmesa, 0 );					\
+      DEBUG_LOCK();							\
+   } while (0)
+
+/* Unlock the hardware.
+ */
+#define UNLOCK_HARDWARE( mmesa )					\
+   do {									\
+      DRM_UNLOCK( mmesa->driFd,						\
+		  mmesa->driHwLock,					\
+		  mmesa->hHWContext );					\
+      DEBUG_RESET();							\
+   } while (0)
+
+#endif
+#endif /* __MACH64_LOCK_H__ */
diff --git a/src/mesa/drivers/dri/mach64/mach64_native_vb.c b/src/mesa/drivers/dri/mach64/mach64_native_vb.c
new file mode 100644
index 0000000000..a348954721
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_native_vb.c
@@ -0,0 +1,305 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Original authors:
+ *    Keith Whitwell <keithw@valinux.com>
+ *
+ * Adapted to Mach64 by:
+ *    Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "math/m_translate.h"
+
+#ifndef LOCALVARS
+#define LOCALVARS
+#endif
+
+void TAG(translate_vertex)(GLcontext *ctx,
+			   const VERTEX *src,
+			   SWvertex *dst)
+{
+   LOCALVARS
+   GLuint format = GET_VERTEX_FORMAT();
+   UNVIEWPORT_VARS;
+   CARD32 *p = (CARD32 *)src + 10 - mmesa->vertex_size;
+
+   dst->win[3] = 1.0;
+   
+   switch ( format ) {
+      case TEX1_VERTEX_FORMAT:
+#ifdef MACH64_PREMULT_TEXCOORDS
+	 {
+	    float rhw = 1.0 / LE32_IN_FLOAT( p + 2 );
+	    
+	    dst->texcoord[1][0] = rhw*LE32_IN_FLOAT( p++ );
+	    dst->texcoord[1][1] = rhw*LE32_IN_FLOAT( p++ );
+	 }
+#else
+	 dst->texcoord[1][0] = LE32_IN_FLOAT( p++ );
+	 dst->texcoord[1][1] = LE32_IN_FLOAT( p++ );
+#endif
+	 dst->texcoord[1][3] = 1.0;
+	 p++;
+
+      case TEX0_VERTEX_FORMAT:
+#ifdef MACH64_PREMULT_TEXCOORDS
+	 {
+	    float rhw = 1.0 / LE32_IN_FLOAT( p + 2 );
+	    
+	    dst->texcoord[0][0] = rhw*LE32_IN_FLOAT( p++ );
+	    dst->texcoord[0][1] = rhw*LE32_IN_FLOAT( p++ );
+	 }
+#else
+	 dst->texcoord[0][0] = LE32_IN_FLOAT( p++ );
+	 dst->texcoord[0][1] = LE32_IN_FLOAT( p++ );
+#endif
+	 dst->texcoord[0][3] = 1.0;
+	 dst->win[3] = LE32_IN_FLOAT( p++ );
+	
+      case NOTEX_VERTEX_FORMAT:
+	 dst->specular[2] = *((GLubyte *)p)++;
+	 dst->specular[1] = *((GLubyte *)p)++;
+	 dst->specular[0] = *((GLubyte *)p)++;
+	 dst->fog = *((GLubyte *)p)++;
+
+      case TINY_VERTEX_FORMAT:
+	 dst->win[2] = UNVIEWPORT_Z( LE32_IN( p++ ) );
+
+	 dst->color[2] = *((GLubyte *)p)++;
+	 dst->color[1] = *((GLubyte *)p)++;
+	 dst->color[0] = *((GLubyte *)p)++;
+	 dst->color[3] = *((GLubyte *)p)++;
+	 
+	 {
+	    GLuint xy = LE32_IN( p );
+	    
+	    dst->win[0] = UNVIEWPORT_X( (GLfloat)(GLshort)( xy >> 16 ) );
+	    dst->win[1] = UNVIEWPORT_Y( (GLfloat)(GLshort)( xy & 0xffff ) );
+	 }
+   }
+
+   assert( p + 1 - (CARD32 *)src == 10 );
+	 
+   dst->pointSize = ctx->Point._Size;
+}
+
+
+
+void TAG(print_vertex)( GLcontext *ctx, const VERTEX *v )
+{
+   LOCALVARS
+   GLuint format = GET_VERTEX_FORMAT();
+   CARD32 *p = (CARD32 *)v + 10 - mmesa->vertex_size;
+   
+   switch ( format ) {
+      case TEX1_VERTEX_FORMAT:
+	 {
+	    GLfloat u, v, w;
+#ifdef MACH64_PREMULT_TEXCOORDS
+	    float rhw = 1.0 / LE32_IN_FLOAT( p + 2 );
+	    
+	    u = rhw*LE32_IN_FLOAT( p++ );
+	    v = rhw*LE32_IN_FLOAT( p++ );
+#else
+	    u = LE32_IN_FLOAT( p++ );
+	    v = LE32_IN_FLOAT( p++ );
+#endif
+	    w = LE32_IN_FLOAT( p++ );
+	    fprintf( stderr, "u1 %f v1 %f w1 %f\n", u, v, w );
+	 }
+
+      case TEX0_VERTEX_FORMAT:
+	 {
+	    GLfloat u, v, w;
+#ifdef MACH64_PREMULT_TEXCOORDS
+	    float rhw = 1.0 / LE32_IN_FLOAT( p + 2 );
+	    
+	    u = rhw*LE32_IN_FLOAT( p++ );
+	    v = rhw*LE32_IN_FLOAT( p++ );
+#else
+	    u = LE32_IN_FLOAT( p++ );
+	    v = LE32_IN_FLOAT( p++ );
+#endif
+	    w = LE32_IN_FLOAT( p++ );
+	    fprintf( stderr, "u0 %f v0 %f w0 %f\n", u, v, w );
+	 }
+	
+      case NOTEX_VERTEX_FORMAT:
+	 {
+	    GLubyte r, g, b, a;
+	    
+	    b = *((GLubyte *)p)++;
+	    g = *((GLubyte *)p)++;
+	    r = *((GLubyte *)p)++;
+	    a = *((GLubyte *)p)++;
+
+	    fprintf(stderr, "spec: r %d g %d b %d a %d\n", r, g, b, a);
+	 }
+
+      case TINY_VERTEX_FORMAT:
+	 {
+	    GLuint xy;
+	    GLfloat x, y, z;
+	    GLubyte r, g, b, a;
+	    
+	    z = LE32_IN( p++ ) / 65536.0;
+
+	    b = *((GLubyte *)p)++;
+	    g = *((GLubyte *)p)++;
+	    r = *((GLubyte *)p)++;
+	    a = *((GLubyte *)p)++;
+	 
+	    xy = LE32_IN( p );
+	    x = (GLfloat)(GLshort)( xy >> 16 ) / 4.0;
+	    y = (GLfloat)(GLshort)( xy & 0xffff ) / 4.0;
+	    
+	    fprintf(stderr, "x %f y %f z %f\n", x, y, z);
+	    fprintf(stderr, "r %d g %d b %d a %d\n", r, g, b, a);
+	 }
+   }
+   
+   assert( p + 1 - (CARD32 *)v == 10 );	 
+
+   fprintf(stderr, "\n");
+}
+
+static void do_import( struct vertex_buffer *VB,
+		       struct gl_client_array *to,
+		       struct gl_client_array *from )
+{
+   GLuint count = VB->Count;
+
+   if (!to->Ptr) {
+      to->Ptr = ALIGN_MALLOC( VB->Size * 4 * sizeof(GLubyte), 32 );
+      to->Type = GL_UNSIGNED_BYTE;
+   }
+
+   /* No need to transform the same value 3000 times.
+    */
+   if (!from->StrideB) {
+      to->StrideB = 0;
+      count = 1;
+   }
+   else
+      to->StrideB = 4 * sizeof(GLubyte);
+   
+   _math_trans_4ub( (GLubyte (*)[4]) to->Ptr,
+		    from->Ptr,
+		    from->StrideB,
+		    from->Type,
+		    from->Size,
+		    0,
+		    count);
+}
+
+#ifndef IMPORT_QUALIFIER
+#define IMPORT_QUALIFIER static
+#endif
+
+IMPORT_QUALIFIER void TAG(import_float_colors)( GLcontext *ctx )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   struct gl_client_array *to = GET_UBYTE_COLOR_STORE();
+   do_import( VB, to, VB->ColorPtr[0] );
+   VB->ColorPtr[0] = to;
+}
+
+IMPORT_QUALIFIER void TAG(import_float_spec_colors)( GLcontext *ctx )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   struct gl_client_array *to = GET_UBYTE_SPEC_COLOR_STORE();
+   do_import( VB, to, VB->SecondaryColorPtr[0] );
+   VB->SecondaryColorPtr[0] = to;
+}
+
+/* Interpolate the elements of the VB not included in typical hardware
+ * vertices.  
+ *
+ * NOTE: All these arrays are guarenteed by tnl to be writeable and
+ * have good stride.
+ */
+#ifndef INTERP_QUALIFIER 
+#define INTERP_QUALIFIER static
+#endif
+
+#define GET_COLOR(ptr, idx) ((ptr)->data[idx])
+
+
+INTERP_QUALIFIER void TAG(interp_extras)( GLcontext *ctx,
+					  GLfloat t,
+					  GLuint dst, GLuint out, GLuint in,
+					  GLboolean force_boundary )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+
+   if (VB->ColorPtr[1]) {
+      INTERP_4CHAN( t,
+		    GET_COLOR(VB->ColorPtr[1], dst),
+		    GET_COLOR(VB->ColorPtr[1], out),
+		    GET_COLOR(VB->ColorPtr[1], in) );
+
+      if (VB->SecondaryColorPtr[1]) {
+	 INTERP_3CHAN( t,
+		       GET_COLOR(VB->SecondaryColorPtr[1], dst),
+		       GET_COLOR(VB->SecondaryColorPtr[1], out),
+		       GET_COLOR(VB->SecondaryColorPtr[1], in) );
+      }
+   }
+
+   if (VB->EdgeFlag) {
+      VB->EdgeFlag[dst] = VB->EdgeFlag[out] || force_boundary;
+   }
+
+   INTERP_VERTEX(ctx, t, dst, out, in, force_boundary);
+}
+
+INTERP_QUALIFIER void TAG(copy_pv_extras)( GLcontext *ctx, 
+					   GLuint dst, GLuint src )
+{
+   LOCALVARS
+      struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+
+   if (VB->ColorPtr[1]) {
+      COPY_CHAN4( GET_COLOR(VB->ColorPtr[1], dst), 
+		  GET_COLOR(VB->ColorPtr[1], src) );
+
+      if (VB->SecondaryColorPtr[1]) {
+	 COPY_CHAN4( GET_COLOR(VB->SecondaryColorPtr[1], dst), 
+		     GET_COLOR(VB->SecondaryColorPtr[1], src) );
+      }
+   }
+
+   COPY_PV_VERTEX(ctx, dst, src);
+}
+
+
+#undef INTERP_QUALIFIER
+#undef IMPORT_QUALIFIER
+#undef GET_COLOR
+
+#undef IND
+#undef TAG
diff --git a/src/mesa/drivers/dri/mach64/mach64_native_vbtmp.h b/src/mesa/drivers/dri/mach64/mach64_native_vbtmp.h
new file mode 100644
index 0000000000..b07cbcfd48
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_native_vbtmp.h
@@ -0,0 +1,586 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Original authors:
+ *    Keith Whitwell <keithw@valinux.com>
+ *
+ * Adapted to Mach64 by:
+ *    Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+
+/* DO_XYZW:  Emit xyz and maybe w coordinates.
+ * DO_RGBA:  Emit color.
+ * DO_SPEC:  Emit specular color.
+ * DO_FOG:   Emit fog coordinate in specular alpha.
+ * DO_TEX0:  Emit tex0 u,v coordinates.
+ * DO_TEX1:  Emit tex1 u,v coordinates.
+ * DO_PTEX:  Emit tex0,1 q coordinates where possible.
+ *
+ * Additionally, this template assumes it is emitting *transformed*
+ * vertices; the modifications to emit untransformed vertices (ie. to
+ * t&l hardware) are probably too great to cooexist with the code
+ * already in this file.
+ */
+
+#define VIEWPORT_X(x)  ((GLint) ((s[0]  * (x) + s[12]) * 4.0))
+#define VIEWPORT_Y(y)  ((GLint) ((s[5]  * (y) + s[13]) * 4.0))
+#define VIEWPORT_Z(z) (((GLuint) (s[10] * (z) + s[14])) << 15)
+
+#ifndef LOCALVARS
+#define LOCALVARS
+#endif
+
+static void TAG(emit)( GLcontext *ctx,
+		       GLuint start, GLuint end,
+		       void *dest,
+		       GLuint stride )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+#if DO_TEX1
+   GLfloat (*tc1)[4];
+   GLuint tc1_stride;
+#if DO_PTEX
+   GLuint tc1_size;
+#endif
+#endif
+#if DO_TEX0
+   GLfloat (*tc0)[4];
+   GLuint tc0_stride;
+#if DO_PTEX
+   GLuint tc0_size;
+#endif
+#endif
+#if DO_SPEC
+   GLubyte (*spec)[4];
+   GLuint spec_stride;
+#endif
+#if DO_FOG
+   GLfloat (*fog)[4];
+   GLuint fog_stride;
+#endif
+#if DO_RGBA
+   GLubyte (*col)[4];
+   GLuint col_stride;
+#endif
+   GLfloat (*coord)[4];
+   GLuint coord_stride;
+   VERTEX *v = (VERTEX *)dest;
+   const GLfloat *s = GET_VIEWPORT_MAT();
+#if DO_TEX1 || DO_TEX0 || DO_XYZW
+   const GLubyte *mask = VB->ClipMask;
+#endif
+   int i;
+
+#if !DO_XYZW
+   (void) s; /* Quiet compiler */
+#endif
+/*     fprintf(stderr, "%s(big) importable %d %d..%d\n",  */
+/*  	   __FUNCTION__, VB->importable_data, start, end); */
+
+#if DO_TEX1
+   {
+      const GLuint t1 = GET_TEXSOURCE(1);
+      tc1 = VB->TexCoordPtr[t1]->data;
+      tc1_stride = VB->TexCoordPtr[t1]->stride;
+#if DO_PTEX
+      tc1_size = VB->TexCoordPtr[t1]->size;
+#endif
+   }
+#endif
+
+#if DO_TEX0
+   {
+      const GLuint t0 = GET_TEXSOURCE(0);
+      tc0 = VB->TexCoordPtr[t0]->data;
+      tc0_stride = VB->TexCoordPtr[t0]->stride;
+#if DO_PTEX
+      tc0_size = VB->TexCoordPtr[t0]->size;
+#endif
+   }
+#endif
+
+#if DO_SPEC
+   spec = (GLubyte (*)[4])VB->SecondaryColorPtr[0]->data;
+   spec_stride = VB->SecondaryColorPtr[0]->stride;
+#endif
+
+#if DO_FOG
+   if (VB->FogCoordPtr) {
+      fog = VB->FogCoordPtr->data;
+      fog_stride = VB->FogCoordPtr->stride;
+   } else {
+      static GLfloat tmp[4] = {0, 0, 0, 0};
+      fog = &tmp;
+      fog_stride = 0;
+   }
+#endif
+
+#if DO_RGBA
+   col = VB->ColorPtr[0]->data;
+   col_stride = VB->ColorPtr[0]->stride;
+#endif
+
+   coord = VB->NdcPtr->data;
+   coord_stride = VB->NdcPtr->stride;
+
+   if (start) {
+#if DO_TEX1
+	 tc1 =  (GLfloat (*)[4])((GLubyte *)tc1 + start * tc1_stride);
+#endif
+#if DO_TEX0
+	 tc0 =  (GLfloat (*)[4])((GLubyte *)tc0 + start * tc0_stride);
+#endif
+#if DO_SPEC
+	 STRIDE_4UB(spec, start * spec_stride);
+#endif
+#if DO_FOG
+	 /*  STRIDE_F(fog, start * fog_stride); */
+	 fog =  (GLfloat (*)[4])((GLubyte *)fog + start * fog_stride);
+#endif
+#if DO_RGBA
+	 STRIDE_4UB(col, start * col_stride);
+#endif
+	 coord =  (GLfloat (*)[4])((GLubyte *)coord + start * coord_stride);
+   }
+
+   for (i=start; i < end; i++, v = (VERTEX *)((GLubyte *)v + stride)) {
+	 CARD32 *p = (CARD32 *)v;
+#if DO_TEX1 || DO_TEX0
+	 GLfloat w;
+
+	 if (mask[i] == 0) {
+	    /* unclipped */
+	    w = coord[0][3];
+	 } else {
+	    /* clipped */
+	    w = 1.0;
+	 }
+#endif
+	 
+#if DO_TEX1
+#if DO_PTEX
+	 if (tc1_size == 4) {
+#ifdef MACH64_PREMULT_TEXCOORDS
+	    LE32_OUT_FLOAT( p++, w*tc1[0][0] );			/* VERTEX_?_SECONDARY_S */
+	    LE32_OUT_FLOAT( p++, w*tc1[0][1] );			/* VERTEX_?_SECONDARY_T */
+	    LE32_OUT_FLOAT( p++, w*tc1[0][3] );			/* VERTEX_?_SECONDARY_W */
+#else /* !MACH64_PREMULT_TEXCOORDS */
+	    float rhw = 1.0 / tc1[0][3];
+	    LE32_OUT_FLOAT( p++, rhw*tc1[0][0] );		/* VERTEX_?_SECONDARY_S */
+	    LE32_OUT_FLOAT( p++, rhw*tc1[0][1] );		/* VERTEX_?_SECONDARY_T */
+	    LE32_OUT_FLOAT( p++, w*tc1[0][3] );			/* VERTEX_?_SECONDARY_W */	
+#endif /* !MACH64_PREMULT_TEXCOORDS */
+	 } else {
+#endif /* DO_PTEX */
+#ifdef MACH64_PREMULT_TEXCOORDS
+	    LE32_OUT_FLOAT( p++, w*tc1[0][0] );			/* VERTEX_?_SECONDARY_S */
+	    LE32_OUT_FLOAT( p++, w*tc1[0][1] );			/* VERTEX_?_SECONDARY_T */
+	    LE32_OUT_FLOAT( p++, w );				/* VERTEX_?_SECONDARY_W */
+#else /* !MACH64_PREMULT_TEXCOORDS */
+	    LE32_OUT_FLOAT( p++, tc1[0][0] );			/* VERTEX_?_SECONDARY_S */
+	    LE32_OUT_FLOAT( p++, tc1[0][1] );			/* VERTEX_?_SECONDARY_T */
+	    LE32_OUT_FLOAT( p++, w );				/* VERTEX_?_SECONDARY_W */
+#endif /* !MACH64_PREMULT_TEXCOORDS */
+#if DO_PTEX
+	 }
+#endif /* DO_PTEX */
+	 tc1 =  (GLfloat (*)[4])((GLubyte *)tc1 +  tc1_stride);
+#else /* !DO_TEX1 */
+	 p += 3;
+#endif /* !DO_TEX1 */
+	    
+#if DO_TEX0
+#if DO_PTEX
+	 if (tc0_size == 4) {
+#ifdef MACH64_PREMULT_TEXCOORDS
+	    LE32_OUT_FLOAT( p++, w*tc0[0][0] );			/* VERTEX_?_S */
+	    LE32_OUT_FLOAT( p++, w*tc0[0][1] );			/* VERTEX_?_T */
+	    LE32_OUT_FLOAT( p++, w*tc0[0][3] );			/* VERTEX_?_W */
+#else /* !MACH64_PREMULT_TEXCOORDS */
+	    float rhw = 1.0 / tc0[0][3];
+	    LE32_OUT_FLOAT( p++, rhw*tc0[0][0] );		/* VERTEX_?_S */
+	    LE32_OUT_FLOAT( p++, rhw*tc0[0][1] );		/* VERTEX_?_T */
+	    LE32_OUT_FLOAT( p++, w*tc0[0][3] );			/* VERTEX_?_W */	
+#endif /* !MACH64_PREMULT_TEXCOORDS */
+	 } else {
+#endif /* DO_PTEX */
+#ifdef MACH64_PREMULT_TEXCOORDS
+	    LE32_OUT_FLOAT( p++, w*tc0[0][0] );			/* VERTEX_?_S */
+	    LE32_OUT_FLOAT( p++, w*tc0[0][1] );			/* VERTEX_?_T */
+	    LE32_OUT_FLOAT( p++, w );				/* VERTEX_?_W */
+#else /* !MACH64_PREMULT_TEXCOORDS */
+	    LE32_OUT_FLOAT( p++, tc0[0][0] );			/* VERTEX_?_S */
+	    LE32_OUT_FLOAT( p++, tc0[0][1] );			/* VERTEX_?_T */
+	    LE32_OUT_FLOAT( p++, w );				/* VERTEX_?_W */
+#endif /* !MACH64_PREMULT_TEXCOORDS */
+#if DO_PTEX
+	 }
+#endif /* DO_PTEX */
+	 tc0 =  (GLfloat (*)[4])((GLubyte *)tc0 +  tc0_stride);
+#else /* !DO_TEX0 */
+	 p += 3;
+#endif /* !DO_TEX0 */
+
+#if DO_SPEC
+	 ((GLubyte *)p)[0] = spec[0][2];			/* VERTEX_?_SPEC_B */
+	 ((GLubyte *)p)[1] = spec[0][1];			/* VERTEX_?_SPEC_G */
+	 ((GLubyte *)p)[2] = spec[0][0];			/* VERTEX_?_SPEC_R */
+	 STRIDE_4UB(spec, spec_stride);
+#endif
+#if DO_FOG
+	 ((GLubyte *)p)[3] = fog[0][0] * 255.0;			/* VERTEX_?_SPEC_A */
+	 fog =  (GLfloat (*)[4])((GLubyte *)fog + fog_stride);
+	 /*  STRIDE_F(fog, fog_stride); */
+#endif
+	 p++;
+	    
+#if DO_XYZW
+	 if (mask[i] == 0) {
+	    /* unclipped */
+	    LE32_OUT( p++, VIEWPORT_Z( coord[0][2] ) );		/* VERTEX_?_Z */
+	 } else {
+#endif
+	    p++;
+#if DO_XYZW
+	 }
+#endif
+
+#if DO_RGBA
+	 *((GLubyte *)p)++ = col[0][2];				/* VERTEX_?_B */
+	 *((GLubyte *)p)++ = col[0][1];				/* VERTEX_?_G */
+	 *((GLubyte *)p)++ = col[0][0];				/* VERTEX_?_R */
+	 *((GLubyte *)p)++ = col[0][3];				/* VERTEX_?_A */
+	 STRIDE_4UB(col, col_stride);
+#else
+	 p++;
+#endif
+
+#if DO_XYZW
+	 if (mask[i] == 0) {
+	    /* unclipped */
+	    LE32_OUT( p,
+		      (VIEWPORT_X( coord[0][0] ) << 16) |	/* VERTEX_?_X */
+		      (VIEWPORT_Y( coord[0][1] ) & 0xffff) );	/* VERTEX_?_Y */
+	    
+	    if (MACH64_DEBUG & DEBUG_VERBOSE_PRIMS) {
+	       fprintf( stderr, "%s: vert (importable) %d: %.2f %.2f %.2f %x\n",
+			__FUNCTION__,
+			i,
+			(LE32_IN( p ) >> 16)/4.0,
+			(LE32_IN( p ) & 0xffff)/4.0,
+			LE32_IN( p - 2 )/65536.0,
+			*(GLuint *)(p - 1) );
+	    }
+	 }
+#endif
+#if DO_TEX1 || DO_TEX0 || DO_XYZW
+	 coord =  (GLfloat (*)[4])((GLubyte *)coord +  coord_stride);
+#endif
+	 
+	 assert( p + 1 - (CARD32 *)v == 10 );
+      }
+}
+
+#if DO_XYZW && DO_RGBA
+
+static GLboolean TAG(check_tex_sizes)( GLcontext *ctx )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+
+   /* Force 'missing' texcoords to something valid.
+    */
+   if (DO_TEX1 && VB->TexCoordPtr[0] == 0)
+      VB->TexCoordPtr[0] = VB->TexCoordPtr[1];
+
+   if (DO_PTEX)
+      return GL_TRUE;
+
+   /* No hardware support for projective texture.  Can fake it for
+    * TEX0 only.
+    */
+   if ((DO_TEX1 && VB->TexCoordPtr[GET_TEXSOURCE(1)]->size == 4)) {
+      PTEX_FALLBACK();
+      return GL_FALSE;
+   }
+
+   if (DO_TEX0 && VB->TexCoordPtr[GET_TEXSOURCE(0)]->size == 4) {
+      if (DO_TEX1) {
+	 PTEX_FALLBACK();
+      }
+      return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+
+
+static void TAG(interp)( GLcontext *ctx,
+			 GLfloat t,
+			 GLuint edst, GLuint eout, GLuint ein,
+			 GLboolean force_boundary )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLubyte *ddverts = GET_VERTEX_STORE();
+   GLuint shift = GET_VERTEX_STRIDE_SHIFT();
+   const GLfloat *dstclip = VB->ClipPtr->data[edst];
+   GLfloat w;
+   const GLfloat *s = GET_VIEWPORT_MAT();
+
+   CARD32 *dst = (CARD32 *)(ddverts + (edst << shift));
+   CARD32 *in  = (CARD32 *)(ddverts + (ein  << shift));
+   CARD32 *out = (CARD32 *)(ddverts + (eout << shift));
+
+   (void)s;
+
+   w = (dstclip[3] == 0.0F) ? 1.0 : (1.0 / dstclip[3]);
+
+#if DO_TEX1
+   {
+      GLfloat temp;
+#if DO_PTEX
+      GLfloat wout = VB->NdcPtr->data[eout][3];
+      GLfloat win = VB->NdcPtr->data[ein][3];
+      GLfloat qout = LE32_IN_FLOAT( out + 2 ) / wout;
+      GLfloat qin = LE32_IN_FLOAT( in + 2 ) / win;
+      GLfloat qdst, rqdst;
+
+      INTERP_F( t, qdst, qout, qin );
+      rqdst = 1.0 / qdst;
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ) * qout, LE32_IN_FLOAT( in ) * qin );
+      LE32_OUT_FLOAT( dst, temp*rqdst );			/* VERTEX_?_SECONDARY_S */
+      dst++; out++; in++;
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ) * qout, LE32_IN_FLOAT( in ) * qin );
+      LE32_OUT_FLOAT( dst, temp*rqdst );			/* VERTEX_?_SECONDARY_T */
+      dst++; out++; in++;
+      
+      LE32_OUT_FLOAT( dst, w*rqdst );				/* VERTEX_?_SECONDARY_W */
+      dst++; out++; in++;
+#else /* !DO_PTEX */
+#ifdef MACH64_PREMULT_TEXCOORDS
+      GLfloat qout = w / LE32_IN_FLOAT( out + 2 );
+      GLfloat qin = w / LE32_IN_FLOAT( in + 2 );
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ) * qout, LE32_IN_FLOAT( in ) * qin );
+      LE32_OUT_FLOAT( dst, temp );				/* VERTEX_?_SECONDARY_S */
+      dst++; out++; in++;
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ) * qout, LE32_IN_FLOAT( in ) * qin );
+      LE32_OUT_FLOAT( dst, temp );				/* VERTEX_?_SECONDARY_T */
+      dst++; out++; in++;
+#else /* !MACH64_PREMULT_TEXCOORDS */
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ), LE32_IN_FLOAT( in ) );
+      LE32_OUT_FLOAT( dst, temp );				/* VERTEX_?_SECONDARY_S */
+      dst++; out++; in++;
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ), LE32_IN_FLOAT( in ) );
+      LE32_OUT_FLOAT( dst, temp );				/* VERTEX_?_SECONDARY_T */
+      dst++; out++; in++;
+#endif /* !MACH64_PREMULT_TEXCOORDS */
+      LE32_OUT_FLOAT( dst, w );					/* VERTEX_?_SECONDARY_W */
+      dst++; out++; in++;
+#endif /* !DO_PTEX */
+   }
+#else /* !DO_TEX1 */
+   dst += 3; out += 3; in += 3;
+#endif /* !DO_TEX1 */
+
+#if DO_TEX0
+   {
+      GLfloat temp;
+#if DO_PTEX
+      GLfloat wout = VB->NdcPtr->data[eout][3];
+      GLfloat win = VB->NdcPtr->data[ein][3];
+      GLfloat qout = LE32_IN_FLOAT( out + 2 ) / wout;
+      GLfloat qin = LE32_IN_FLOAT( in + 2 ) / win;
+      GLfloat qdst, rqdst;
+
+      INTERP_F( t, qdst, qout, qin );
+      rqdst = 1.0 / qdst;
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ) * qout, LE32_IN_FLOAT( in ) * qin );
+      LE32_OUT_FLOAT( dst, temp*rqdst );			/* VERTEX_?_S */
+      dst++; out++; in++;
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ) * qout, LE32_IN_FLOAT( in ) * qin );
+      LE32_OUT_FLOAT( dst, temp*rqdst );			/* VERTEX_?_T */
+      dst++; out++; in++;
+      
+      LE32_OUT_FLOAT( dst, w*rqdst );				/* VERTEX_?_W */
+      dst++; out++; in++;
+#else /* !DO_PTEX */
+#ifdef MACH64_PREMULT_TEXCOORDS
+      GLfloat qout = w / LE32_IN_FLOAT( out + 2 );
+      GLfloat qin = w / LE32_IN_FLOAT( in + 2 );
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ) * qout, LE32_IN_FLOAT( in ) * qin );
+      LE32_OUT_FLOAT( dst, temp );				/* VERTEX_?_S */
+      dst++; out++; in++;
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ) * qout, LE32_IN_FLOAT( in ) * qin );
+      LE32_OUT_FLOAT( dst, temp );				/* VERTEX_?_T */
+      dst++; out++; in++;
+#else /* !MACH64_PREMULT_TEXCOORDS */
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ), LE32_IN_FLOAT( in ) );
+      LE32_OUT_FLOAT( dst, temp );				/* VERTEX_?_S */
+      dst++; out++; in++;
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ), LE32_IN_FLOAT( in ) );
+      LE32_OUT_FLOAT( dst, temp );				/* VERTEX_?_T */
+      dst++; out++; in++;
+#endif /* !MACH64_PREMULT_TEXCOORDS */
+      LE32_OUT_FLOAT( dst, w );					/* VERTEX_?_W */
+      dst++; out++; in++;
+#endif /* !DO_PTEX */
+   }
+#else /* !DO_TEX0 */
+   dst += 3; out += 3; in += 3;
+#endif /* !DO_TEX0 */
+   
+#if DO_SPEC
+   INTERP_UB( t, *(GLubyte *)dst, *(GLubyte *)out, *(GLubyte *)in );	/* VERTEX_?_SPEC_B */
+   ((GLubyte *)dst)++; ((GLubyte *)out)++; ((GLubyte *)in)++;
+
+   INTERP_UB( t, *(GLubyte *)dst, *(GLubyte *)out, *(GLubyte *)in );	/* VERTEX_?_SPEC_G */
+   ((GLubyte *)dst)++; ((GLubyte *)out)++; ((GLubyte *)in)++;
+   
+   INTERP_UB( t, *(GLubyte *)dst, *(GLubyte *)out, *(GLubyte *)in );	/* VERTEX_?_SPEC_R */
+#if DO_FOG
+   ((GLubyte *)dst)++; ((GLubyte *)out)++; ((GLubyte *)in)++;
+#else /* !DO_FOG */
+   ((GLubyte *)dst) += 2; ((GLubyte *)out) += 2; ((GLubyte *)in) += 2;
+#endif /* !DO_FOG */
+#elif DO_FOG
+   ((GLubyte *)dst) += 3; ((GLubyte *)out) += 3; ((GLubyte *)in) += 3;
+#endif
+   
+#if DO_FOG
+   INTERP_UB( t, *(GLubyte *)dst, *(GLubyte *)out, *(GLubyte *)in );	/* VERTEX_?_SPEC_A */
+   ((GLubyte *)dst)++; ((GLubyte *)out)++; ((GLubyte *)in)++;
+#endif /* DO_FOG */
+
+#if !DO_SPEC && !DO_FOG
+   dst++; out++; in++;
+#endif
+
+   LE32_OUT( dst, VIEWPORT_Z( dstclip[2] * w ) );		/* VERTEX_?_Z */
+   dst++; out++; in++;
+  
+   INTERP_UB( t, *(GLubyte *)dst, *(GLubyte *)out, *(GLubyte *)in );	/* VERTEX_?_B */
+   ((GLubyte *)dst)++; ((GLubyte *)out)++; ((GLubyte *)in)++;
+
+   INTERP_UB( t, *(GLubyte *)dst, *(GLubyte *)out, *(GLubyte *)in );	/* VERTEX_?_G */
+   ((GLubyte *)dst)++; ((GLubyte *)out)++; ((GLubyte *)in)++;
+
+   INTERP_UB( t, *(GLubyte *)dst, *(GLubyte *)out, *(GLubyte *)in );	/* VERTEX_?_R */
+   ((GLubyte *)dst)++; ((GLubyte *)out)++; ((GLubyte *)in)++;
+
+   INTERP_UB( t, *(GLubyte *)dst, *(GLubyte *)out, *(GLubyte *)in );	/* VERTEX_?_A */
+   ((GLubyte *)dst)++; /* ((GLubyte *)out)++; ((GLubyte *)in)++; */
+
+   LE32_OUT( dst,
+	     (VIEWPORT_X( dstclip[0] * w ) << 16) |		/* VERTEX_?_X */
+	     (VIEWPORT_Y( dstclip[1] * w ) & 0xffff) );		/* VERTEX_?_Y */
+
+   assert( dst + 1 - (CARD32 *)(ddverts + (edst << shift)) == 10 );
+   assert( in  + 2 - (CARD32 *)(ddverts + (ein  << shift)) == 10 );
+   assert( out + 2 - (CARD32 *)(ddverts + (eout << shift)) == 10 );
+
+   if (MACH64_DEBUG & DEBUG_VERBOSE_PRIMS) {
+      fprintf( stderr, "%s: dst vert: %.2f %.2f %.2f %x\n",
+	       __FUNCTION__,
+	       (GLshort)(LE32_IN( dst ) >> 16)/4.0,
+	       (GLshort)(LE32_IN( dst ) & 0xffff)/4.0,
+	       LE32_IN( dst - 2 )/65536.0,
+	       *(GLuint *)(dst - 1) );
+   }
+}
+
+#endif /* DO_RGBA && DO_XYZW */
+
+
+static void TAG(copy_pv)( GLcontext *ctx, GLuint edst, GLuint esrc )
+{
+#if DO_SPEC || DO_FOG || DO_RGBA
+   LOCALVARS   
+   GLubyte *verts = GET_VERTEX_STORE();
+   GLuint shift = GET_VERTEX_STRIDE_SHIFT();
+   GLuint *dst = (GLuint *)(verts + (edst << shift));
+   GLuint *src = (GLuint *)(verts + (esrc << shift));
+#endif
+
+#if DO_SPEC || DO_FOG
+   dst[6] = src[6];			/* VERTEX_?_SPEC_ARGB */
+#endif
+
+#if DO_RGBA
+   dst[8] = src[8];			/* VERTEX_?_ARGB */
+#endif
+}
+
+static void TAG(init)( void )
+{
+   setup_tab[IND].emit = TAG(emit);
+
+#if DO_XYZW && DO_RGBA
+   setup_tab[IND].check_tex_sizes = TAG(check_tex_sizes);
+   setup_tab[IND].interp = TAG(interp);
+#endif
+
+   setup_tab[IND].copy_pv = TAG(copy_pv);
+
+   /* vertex_stride_shift must be the same because each
+    * vertex is aligned with the end of the structure and 
+    * not the beginning
+    */
+#if DO_TEX1
+   setup_tab[IND].vertex_format = TEX1_VERTEX_FORMAT;
+   setup_tab[IND].vertex_size = 10;
+   setup_tab[IND].vertex_stride_shift = 6;
+#elif DO_TEX0
+   setup_tab[IND].vertex_format = TEX0_VERTEX_FORMAT;
+   setup_tab[IND].vertex_size = 7;
+   setup_tab[IND].vertex_stride_shift = 6;
+#elif DO_SPEC || DO_FOG
+   setup_tab[IND].vertex_format = NOTEX_VERTEX_FORMAT;
+   setup_tab[IND].vertex_size = 4;
+   setup_tab[IND].vertex_stride_shift = 6;
+#else
+   setup_tab[IND].vertex_format = TINY_VERTEX_FORMAT;
+   setup_tab[IND].vertex_size = 3;
+   setup_tab[IND].vertex_stride_shift = 6;
+#endif
+
+   assert(setup_tab[IND].vertex_size * 4 <=
+          1 << setup_tab[IND].vertex_stride_shift);
+}
+
+
+#undef IND
+#undef TAG
diff --git a/src/mesa/drivers/dri/mach64/mach64_reg.h b/src/mesa/drivers/dri/mach64/mach64_reg.h
new file mode 100644
index 0000000000..2fd31de6c7
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_reg.h
@@ -0,0 +1,409 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#ifndef __MACH64_REG_H__
+#define __MACH64_REG_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+/*
+ * Not sure how this compares with the G200, but the Rage Pro has two
+ * banks of registers, with bank 0 at (aperture base + memmap offset - 1KB)
+ * and bank 1 at (aperture base + memmap offset - 2KB).  But, to send them
+ * via DMA, we need to encode them as memory map select rather than physical
+ * offsets.
+ */
+#define DWMREG0		0x0400
+#define DWMREG0_END	0x07ff
+#define DWMREG1		0x0000
+#define DWMREG1_END	0x03ff
+
+#define ISREG0(r)	( ( (r) >= DWMREG0 ) && ( (r) <= DWMREG0_END ) )
+#define ADRINDEX0(r)	( ((r) - DWMREG0) >> 2 )
+#define ADRINDEX1(r)	( ( ((r) - DWMREG1) >> 2 ) | 0x0100 )
+#define ADRINDEX(r)	( ISREG0(r) ? ADRINDEX0(r) : ADRINDEX1(r) )
+
+#define MMREG0		0x0000
+#define MMREG0_END	0x00ff
+
+#define ISMMREG0(r)	( ( (r) >= MMREG0 ) && ( (r) <= MMREG0_END ) )
+#define MMSELECT0(r)	( ((r)<<2) + DWMREG0 )
+#define MMSELECT1(r)	( ( (((r) & 0xff)<<2) + DWMREG1 ) )
+#define MMSELECT(r)	( ISMMREG0(r) ? MMSELECT0(r) : MMSELECT1(r) )
+
+/* FIXME: If register reads are necessary, we should account for endianess here */
+#define MACH64_BASE(reg)	((CARD32)(mmesa->mach64Screen->mmio.map))
+#define MACH64_ADDR(reg)	(MACH64_BASE(reg) + reg)
+
+#define MACH64_DEREF(reg)	*(__volatile__ CARD32 *)MACH64_ADDR(reg)
+#define MACH64_READ(reg)	MACH64_DEREF(reg)
+
+
+/* ================================================================
+ * Registers
+ */
+
+#define MACH64_ALPHA_TST_CNTL			0x0550
+#	define MACH64_ALPHA_TEST_EN			(1 << 0)
+#	define MACH64_ALPHA_TEST_MASK			(7 << 4)
+#	define MACH64_ALPHA_TEST_NEVER			(0 << 4)
+#	define MACH64_ALPHA_TEST_LESS			(1 << 4)
+#	define MACH64_ALPHA_TEST_LEQUAL			(2 << 4)
+#	define MACH64_ALPHA_TEST_EQUAL			(3 << 4)
+#	define MACH64_ALPHA_TEST_GEQUAL			(4 << 4)
+#	define MACH64_ALPHA_TEST_GREATER		(5 << 4)
+#	define MACH64_ALPHA_TEST_NOTEQUAL		(6 << 4)
+#	define MACH64_ALPHA_TEST_ALWAYS			(7 << 4)
+#	define MACH64_ALPHA_MOD_MSB			(1 << 7)
+#	define MACH64_ALPHA_DST_MASK			(7 << 8)
+#	define MACH64_ALPHA_DST_ZERO			(0 << 8)
+#	define MACH64_ALPHA_DST_ONE			(1 << 8)
+#	define MACH64_ALPHA_DST_SRCALPHA		(4 << 8)
+#	define MACH64_ALPHA_DST_INVSRCALPHA		(5 << 8)
+#	define MACH64_ALPHA_DST_DSTALPHA		(6 << 8)
+#	define MACH64_ALPHA_DST_INVDSTALPHA		(7 << 8)
+#	define MACH64_ALPHA_TST_SRC_TEXEL		(0 << 12)
+#	define MACH64_ALPHA_TST_SRC_SRCALPHA		(1 << 12)
+#	define MACH64_REF_ALPHA_MASK			(0xff << 16)
+#	define MACH64_REF_ALPHA_SHIFT			16
+#	define MACH64_COMPOSITE_SHADOW			(1 << 30)
+#	define MACH64_SPECULAR_LIGHT_EN			(1 << 31)
+
+#define MACH64_BUS_CNTL				0x04a0
+#	define MACH64_BUS_MSTR_RESET			(1 << 1)
+#	define MACH64_BUS_FLUSH_BUF			(1 << 2)
+#	define MACH64_BUS_MASTER_DIS			(1 << 6)
+#	define MACH64_BUS_EXT_REG_EN			(1 << 27)
+
+#define MACH64_COMPOSITE_SHADOW_ID		0x0798
+
+#define MACH64_CLR_CMP_CLR			0x0700
+#define MACH64_CLR_CMP_CNTL			0x0708
+#define MACH64_CLR_CMP_MASK			0x0704
+
+#define MACH64_DP_BKGD_CLR			0x06c0
+#define MACH64_DP_FOG_CLR			0x06c4
+#define MACH64_DP_FGRD_BKGD_CLR			0x06e0
+#define MACH64_DP_FRGD_CLR			0x06c4
+#define MACH64_DP_FGRD_CLR_MIX			0x06dc
+
+#define MACH64_DP_MIX				0x06d4
+#	define BKGD_MIX_NOT_D				(0 << 0)
+#	define BKGD_MIX_ZERO				(1 << 0)
+#	define BKGD_MIX_ONE				(2 << 0)
+#	define MACH64_BKGD_MIX_D			(3 << 0)
+#	define BKGD_MIX_NOT_S				(4 << 0)
+#	define BKGD_MIX_D_XOR_S				(5 << 0)
+#	define BKGD_MIX_NOT_D_XOR_S			(6 << 0)
+#	define MACH64_BKGD_MIX_S			(7 << 0)
+#	define BKGD_MIX_NOT_D_OR_NOT_S			(8 << 0)
+#	define BKGD_MIX_D_OR_NOT_S			(9 << 0)
+#	define BKGD_MIX_NOT_D_OR_S			(10 << 0)
+#	define BKGD_MIX_D_OR_S				(11 << 0)
+#	define BKGD_MIX_D_AND_S				(12 << 0)
+#	define BKGD_MIX_NOT_D_AND_S			(13 << 0)
+#	define BKGD_MIX_D_AND_NOT_S			(14 << 0)
+#	define BKGD_MIX_NOT_D_AND_NOT_S			(15 << 0)
+#	define BKGD_MIX_D_PLUS_S_DIV2			(23 << 0)
+#	define FRGD_MIX_NOT_D				(0 << 16)
+#	define FRGD_MIX_ZERO				(1 << 16)
+#	define FRGD_MIX_ONE				(2 << 16)
+#	define FRGD_MIX_D				(3 << 16)
+#	define FRGD_MIX_NOT_S				(4 << 16)
+#	define FRGD_MIX_D_XOR_S				(5 << 16)
+#	define FRGD_MIX_NOT_D_XOR_S			(6 << 16)
+#	define MACH64_FRGD_MIX_S			(7 << 16)
+#	define FRGD_MIX_NOT_D_OR_NOT_S			(8 << 16)
+#	define FRGD_MIX_D_OR_NOT_S			(9 << 16)
+#	define FRGD_MIX_NOT_D_OR_S			(10 << 16)
+#	define FRGD_MIX_D_OR_S				(11 << 16)
+#	define FRGD_MIX_D_AND_S				(12 << 16)
+#	define FRGD_MIX_NOT_D_AND_S			(13 << 16)
+#	define FRGD_MIX_D_AND_NOT_S			(14 << 16)
+#	define FRGD_MIX_NOT_D_AND_NOT_S			(15 << 16)
+#	define FRGD_MIX_D_PLUS_S_DIV2			(23 << 16)
+
+#define MACH64_DP_PIX_WIDTH			0x06d0
+#	define MACH64_COMPOSITE_PIX_WIDTH_MASK		(0xf << 4)
+#	define MACH64_HOST_TRIPLE_ENABLE		(1 << 13)
+#	define MACH64_BYTE_ORDER_MSB_TO_LSB		(0 << 24)
+#	define MACH64_BYTE_ORDER_LSB_TO_MSB		(1 << 24)
+#	define MACH64_SCALE_PIX_WIDTH_MASK		(0xf << 28)
+
+#define MACH64_DP_SRC				0x06d8
+#	define MACH64_BKGD_SRC_BKGD_CLR			(0 << 0)
+#	define MACH64_BKGD_SRC_FRGD_CLR			(1 << 0)
+#	define MACH64_BKGD_SRC_HOST			(2 << 0)
+#	define MACH64_BKGD_SRC_BLIT			(3 << 0)
+#	define MACH64_BKGD_SRC_PATTERN			(4 << 0)
+#	define MACH64_BKGD_SRC_3D			(5 << 0)
+#	define MACH64_FRGD_SRC_BKGD_CLR			(0 << 8)
+#	define MACH64_FRGD_SRC_FRGD_CLR			(1 << 8)
+#	define MACH64_FRGD_SRC_HOST			(2 << 8)
+#	define MACH64_FRGD_SRC_BLIT			(3 << 8)
+#	define MACH64_FRGD_SRC_PATTERN			(4 << 8)
+#	define MACH64_FRGD_SRC_3D			(5 << 8)
+#	define MACH64_MONO_SRC_ONE			(0 << 16)
+#	define MACH64_MONO_SRC_PATTERN			(1 << 16)
+#	define MACH64_MONO_SRC_HOST			(2 << 16)
+#	define MACH64_MONO_SRC_BLIT			(3 << 16)
+
+#define MACH64_DP_WRITE_MASK			0x06c8
+
+#define MACH64_DST_CNTL				0x0530
+#	define MACH64_DST_X_RIGHT_TO_LEFT		(0 << 0)
+#	define MACH64_DST_X_LEFT_TO_RIGHT		(1 << 0)
+#	define MACH64_DST_Y_BOTTOM_TO_TOP		(0 << 1)
+#	define MACH64_DST_Y_TOP_TO_BOTTOM		(1 << 1)
+#	define MACH64_DST_X_MAJOR			(0 << 2)
+#	define MACH64_DST_Y_MAJOR			(1 << 2)
+#	define MACH64_DST_X_TILE			(1 << 3)
+#	define MACH64_DST_Y_TILE			(1 << 4)
+#	define MACH64_DST_LAST_PEL			(1 << 5)
+#	define MACH64_DST_POLYGON_ENABLE		(1 << 6)
+#	define MACH64_DST_24_ROTATION_ENABLE		(1 << 7)
+
+#define MACH64_DST_HEIGHT_WIDTH			0x0518
+#define MACH64_DST_OFF_PITCH			0x0500
+#define MACH64_DST_WIDTH_HEIGHT			0x06ec
+#define MACH64_DST_X_Y				0x06e8
+#define MACH64_DST_Y_X				0x050c
+
+#define MACH64_FIFO_STAT			0x0710
+#	define MACH64_FIFO_SLOT_MASK			0x0000ffff
+#	define MACH64_FIFO_ERR				(1 << 31)
+
+#define MACH64_GEN_TEST_CNTL			0x04d0
+#define MACH64_GUI_CMDFIFO_DEBUG		0x0170
+#define MACH64_GUI_CMDFIFO_DATA			0x0174
+#define MACH64_GUI_CNTL				0x0178
+#define MACH64_GUI_STAT				0x0738
+#	define MACH64_GUI_ACTIVE			(1 << 0)
+#define MACH64_GUI_TRAJ_CNTL			0x0730
+
+#define MACH64_HOST_CNTL			0x0640
+#define MACH64_HOST_DATA0			0x0600
+#define MACH64_HW_DEBUG				0x047c
+
+#define MACH64_ONE_OVER_AREA			0x029c
+#define MACH64_ONE_OVER_AREA_UC			0x0300
+
+#define MACH64_PAT_REG0				0x0680
+#define MACH64_PAT_REG1				0x0684
+
+#define MACH64_SC_LEFT_RIGHT			0x06a8
+#define MACH64_SC_TOP_BOTTOM			0x06b4
+#define MACH64_SCALE_3D_CNTL			0x05fc
+#	define MACH64_SCALE_PIX_EXPAND_ZERO_EXTEND	(0 << 0)
+#	define MACH64_SCALE_PIX_EXPAND_DYNAMIC_RANGE	(1 << 0)
+#	define MACH64_SCALE_DITHER_ERROR_DIFFUSE	(0 << 1)
+#	define MACH64_SCALE_DITHER_2D_TABLE		(1 << 1)
+#	define MACH64_DITHER_EN				(1 << 2)
+#	define MACH64_DITHER_INIT_CURRENT		(O << 3)
+#	define MACH64_DITHER_INIT_RESET			(1 << 3)
+#	define MACH64_ROUND_EN				(1 << 4)
+#	define MACH64_TEX_CACHE_DIS			(1 << 5)
+#	define MACH64_SCALE_3D_FCN_MASK			(3 << 6)
+#	define MACH64_SCALE_3D_FCN_NOP			(0 << 6)
+#	define MACH64_SCALE_3D_FCN_SCALE		(1 << 6)
+#	define MACH64_SCALE_3D_FCN_TEXTURE		(2 << 6)
+#	define MACH64_SCALE_3D_FCN_SHADE		(3 << 6)
+#	define MACH64_TEXTURE_DISABLE			(1 << 6)
+#	define MACH64_EDGE_ANTI_ALIAS			(1 << 8)
+#	define MACH64_TEX_CACHE_SPLIT			(1 << 9)
+#	define MACH64_APPLE_YUV_MODE			(1 << 10)
+#	define MACH64_ALPHA_FOG_EN_MASK			(3 << 11)
+#	define MACH64_ALPHA_FOG_DIS			(0 << 11)
+#	define MACH64_ALPHA_FOG_EN_ALPHA		(1 << 11)
+#	define MACH64_ALPHA_FOG_EN_FOG			(2 << 11)
+#	define MACH64_ALPHA_BLEND_SAT			(1 << 13)
+#	define MACH64_RED_DITHER_MAX			(1 << 14)
+#	define MACH64_SIGNED_DST_CLAMP			(1 << 15)
+#	define MACH64_ALPHA_BLEND_SRC_MASK		(7 << 16)
+#	define MACH64_ALPHA_BLEND_SRC_ZERO		(0 << 16)
+#	define MACH64_ALPHA_BLEND_SRC_ONE		(1 << 16)
+#	define MACH64_ALPHA_BLEND_SRC_DSTCOLOR		(2 << 16)
+#	define MACH64_ALPHA_BLEND_SRC_INVDSTCOLOR	(3 << 16)
+#	define MACH64_ALPHA_BLEND_SRC_SRCALPHA		(4 << 16)
+#	define MACH64_ALPHA_BLEND_SRC_INVSRCALPHA	(5 << 16)
+#	define MACH64_ALPHA_BLEND_SRC_DSTALPHA		(6 << 16)
+#	define MACH64_ALPHA_BLEND_SRC_INVDSTALPHA	(7 << 16)
+#	define MACH64_ALPHA_BLEND_DST_MASK		(7 << 19)
+#	define MACH64_ALPHA_BLEND_DST_ZERO		(0 << 19)
+#	define MACH64_ALPHA_BLEND_DST_ONE		(1 << 19)
+#	define MACH64_ALPHA_BLEND_DST_SRCCOLOR		(2 << 19)
+#	define MACH64_ALPHA_BLEND_DST_INVSRCCOLOR	(3 << 19)
+#	define MACH64_ALPHA_BLEND_DST_SRCALPHA		(4 << 19)
+#	define MACH64_ALPHA_BLEND_DST_INVSRCALPHA	(5 << 19)
+#	define MACH64_ALPHA_BLEND_DST_DSTALPHA		(6 << 19)
+#	define MACH64_ALPHA_BLEND_DST_INVDSTALPHA	(7 << 19)
+#	define MACH64_TEX_LIGHT_FCN_MASK		(3 << 22)
+#	define MACH64_TEX_LIGHT_FCN_REPLACE		(0 << 22)
+#	define MACH64_TEX_LIGHT_FCN_MODULATE		(1 << 22)
+#	define MACH64_TEX_LIGHT_FCN_ALPHA_DECAL		(2 << 22)
+#	define MACH64_MIP_MAP_DISABLE			(1 << 24)
+#	define MACH64_BILINEAR_TEX_EN			(1 << 25)
+#	define MACH64_TEX_BLEND_FCN_MASK		(3 << 26)
+#	define MACH64_TEX_BLEND_FCN_NEAREST		(0 << 26)
+#	define MACH64_TEX_BLEND_FCN_LINEAR		(2 << 26)
+#	define MACH64_TEX_BLEND_FCN_TRILINEAR		(3 << 26)
+#	define MACH64_TEX_AMASK_AEN			(1 << 28)
+#	define MACH64_TEX_AMASK_BLEND_EDGE		(1 << 29)
+#	define MACH64_TEX_MAP_AEN			(1 << 30)
+#	define MACH64_SRC_3D_HOST_FIFO			(1 << 31)
+#define MACH64_SCRATCH_REG0			0x0480
+#define MACH64_SCRATCH_REG1			0x0484
+#define MACH64_SECONDARY_TEX_OFF		0x0778
+#define MACH64_SETUP_CNTL			0x0304
+#	define MACH64_DONT_START_TRI			(1 << 0)
+#	define MACH64_DONT_START_ANY			(1 << 2)
+#	define MACH64_FLAT_SHADE_MASK			(3 << 3)
+#	define MACH64_FLAT_SHADE_OFF			(0 << 3)
+#	define MACH64_FLAT_SHADE_VERTEX_1		(1 << 3)
+#	define MACH64_FLAT_SHADE_VERTEX_2		(2 << 3)
+#	define MACH64_FLAT_SHADE_VERTEX_3		(3 << 3)
+#	define MACH64_SOLID_MODE_OFF			(0 << 5)
+#	define MACH64_SOLID_MODE_ON			(1 << 5)
+#	define MACH64_LOG_MAX_INC_ADJ			(1 << 6)
+#	define MACH64_SET_UP_CONTINUE			(1 << 31)
+#define MACH64_SRC_CNTL				0x05b4
+#define MACH64_SRC_HEIGHT1			0x0594
+#define MACH64_SRC_HEIGHT2			0x05ac
+#define MACH64_SRC_HEIGHT1_WIDTH1		0x0598
+#define MACH64_SRC_HEIGHT2_WIDTH2		0x05b0
+#define MACH64_SRC_OFF_PITCH			0x0580
+#define MACH64_SRC_WIDTH1			0x0590
+#define MACH64_SRC_Y_X				0x058c
+
+#define MACH64_TEX_0_OFF			0x05c0
+#define MACH64_TEX_CNTL				0x0774
+#	define MACH64_LOD_BIAS_SHIFT			0
+#	define MACH64_LOD_BIAS_MASK			(0xf << 0)
+#	define MACH64_COMP_FACTOR_SHIFT			4
+#	define MACH64_COMP_FACTOR_MASK			(0xf << 4)
+#	define MACH64_TEXTURE_COMPOSITE			(1 << 8)
+#	define MACH64_COMP_COMBINE_BLEND		(0 << 9)
+#	define MACH64_COMP_COMBINE_MODULATE		(1 << 9)
+#	define MACH64_COMP_BLEND_NEAREST		(0 << 11)
+#	define MACH64_COMP_BLEND_BILINEAR		(1 << 11)
+#	define MACH64_COMP_FILTER_NEAREST		(0 << 12)
+#	define MACH64_COMP_FILTER_BILINEAR		(1 << 12)
+#	define MACH64_COMP_ALPHA			(1 << 13)
+#	define MACH64_TEXTURE_TILING			(1 << 14)
+#	define MACH64_COMPOSITE_TEX_TILING		(1 << 15)
+#	define MACH64_TEX_COLLISION_DISABLE		(1 << 16)
+#	define MACH64_TEXTURE_CLAMP_S			(1 << 17)
+#	define MACH64_TEXTURE_CLAMP_T			(1 << 18)
+#	define MACH64_TEX_ST_MULT_W			(0 << 19)
+#	define MACH64_TEX_ST_DIRECT			(1 << 19)
+#	define MACH64_TEX_SRC_LOCAL			(0 << 20)
+#	define MACH64_TEX_SRC_AGP			(1 << 20)
+#	define MACH64_TEX_UNCOMPRESSED			(0 << 21)
+#	define MACH64_TEX_VQ_COMPRESSED			(1 << 21)
+#	define MACH64_COMP_TEX_UNCOMPRESSED		(0 << 22)
+#	define MACH64_COMP_TEX_VQ_COMPRESSED		(1 << 22)
+#	define MACH64_TEX_CACHE_FLUSH			(1 << 23)
+#	define MACH64_SEC_TEX_CLAMP_S			(1 << 24)
+#	define MACH64_SEC_TEX_CLAMP_T			(1 << 25)
+#	define MACH64_TEX_WRAP_S			(1 << 28)
+#	define MACH64_TEX_WRAP_T			(1 << 29)
+#	define MACH64_TEX_CACHE_SIZE_4K			(1 << 30)
+#	define MACH64_TEX_CACHE_SIZE_2K			(1 << 30)
+#	define MACH64_SECONDARY_STW			(1 << 31)
+#define MACH64_TEX_PALETTE			0x077c
+#define MACH64_TEX_PALETTE_INDEX		0x0740
+#define MACH64_TEX_SIZE_PITCH			0x0770
+
+#define MACH64_VERTEX_1_ARGB			0x0254
+#define MACH64_VERTEX_1_S			0x0240
+#define MACH64_VERTEX_1_SECONDARY_S		0x0328
+#define MACH64_VERTEX_1_SECONDARY_T		0x032c
+#define MACH64_VERTEX_1_SECONDARY_W		0x0330
+#define MACH64_VERTEX_1_SPEC_ARGB		0x024c
+#define MACH64_VERTEX_1_T			0x0244
+#define MACH64_VERTEX_1_W			0x0248
+#define MACH64_VERTEX_1_X_Y			0x0258
+#define MACH64_VERTEX_1_Z			0x0250
+#define MACH64_VERTEX_2_ARGB			0x0274
+#define MACH64_VERTEX_2_S			0x0260
+#define MACH64_VERTEX_2_SECONDARY_S		0x0334
+#define MACH64_VERTEX_2_SECONDARY_T		0x0338
+#define MACH64_VERTEX_2_SECONDARY_W		0x033c
+#define MACH64_VERTEX_2_SPEC_ARGB		0x026c
+#define MACH64_VERTEX_2_T			0x0264
+#define MACH64_VERTEX_2_W			0x0268
+#define MACH64_VERTEX_2_X_Y			0x0278
+#define MACH64_VERTEX_2_Z			0x0270
+#define MACH64_VERTEX_3_ARGB			0x0294
+#define MACH64_VERTEX_3_S			0x0280
+#define MACH64_VERTEX_3_SECONDARY_S		0x02a0
+#define MACH64_VERTEX_3_SECONDARY_T		0x02a4
+#define MACH64_VERTEX_3_SECONDARY_W		0x02a8
+#define MACH64_VERTEX_3_SPEC_ARGB		0x028c
+#define MACH64_VERTEX_3_T			0x0284
+#define MACH64_VERTEX_3_W			0x0288
+#define MACH64_VERTEX_3_X_Y			0x0298
+#define MACH64_VERTEX_3_Z			0x0290
+
+#define MACH64_Z_CNTL				0x054c
+#	define MACH64_Z_EN				(1 << 0)
+#	define MACH64_Z_SRC_2D				(1 << 1)
+#	define MACH64_Z_TEST_MASK			(7 << 4)
+#	define MACH64_Z_TEST_NEVER			(0 << 4)
+#	define MACH64_Z_TEST_LESS			(1 << 4)
+#	define MACH64_Z_TEST_LEQUAL			(2 << 4)
+#	define MACH64_Z_TEST_EQUAL			(3 << 4)
+#	define MACH64_Z_TEST_GEQUAL			(4 << 4)
+#	define MACH64_Z_TEST_GREATER			(5 << 4)
+#	define MACH64_Z_TEST_NOTEQUAL			(6 << 4)
+#	define MACH64_Z_TEST_ALWAYS			(7 << 4)
+#	define MACH64_Z_MASK_EN				(1 << 8)
+#define MACH64_Z_OFF_PITCH			0x0548
+
+
+
+#define MACH64_DATATYPE_CI8				2
+#define MACH64_DATATYPE_ARGB1555			3
+#define MACH64_DATATYPE_RGB565				4
+#define MACH64_DATATYPE_ARGB8888			6
+#define MACH64_DATATYPE_RGB332				7
+#define MACH64_DATATYPE_Y8				8
+#define MACH64_DATATYPE_RGB8				9
+#define MACH64_DATATYPE_VYUY422				11
+#define MACH64_DATATYPE_YVYU422				12
+#define MACH64_DATATYPE_AYUV444				14
+#define MACH64_DATATYPE_ARGB4444			15
+
+#define MACH64_LAST_FRAME_REG			MACH64_PAT_REG0
+#define MACH64_LAST_DISPATCH_REG		MACH64_PAT_REG1
+
+#endif
+#endif /* __MACH64_REG_H__ */
diff --git a/src/mesa/drivers/dri/mach64/mach64_screen.c b/src/mesa/drivers/dri/mach64/mach64_screen.c
new file mode 100644
index 0000000000..69bd228285
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_screen.c
@@ -0,0 +1,318 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "mach64_context.h"
+#include "mach64_ioctl.h"
+#include "mach64_tris.h"
+#include "mach64_vb.h"
+
+#include "context.h"
+#include "imports.h"
+
+#include "utils.h"
+#include "vblank.h"
+
+#include "glxextensions.h"
+
+/* Create the device specific screen private data struct.
+ */
+static mach64ScreenRec *
+mach64CreateScreen( __DRIscreenPrivate *driScreen )
+{
+   mach64ScreenRec *mach64Screen;
+   ATIDRIPtr serverInfo = (ATIDRIPtr)driScreen->pDevPriv;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_DRI ) 
+      fprintf( stderr, "%s\n", __FUNCTION__ );
+
+   if ( ! driCheckDriDdxDrmVersions( driScreen, "Mach64", 4, 0, 6, 4, 1, 0 ) )
+      return NULL;
+
+   /* Allocate the private area */
+   mach64Screen = (mach64ScreenRec *) CALLOC( sizeof(*mach64Screen) );
+   if ( !mach64Screen ) return NULL;
+   
+   mach64Screen->IsPCI = serverInfo->IsPCI;
+
+   {
+      drmMach64GetParam gp;
+      int ret;
+
+      gp.param = MACH64_PARAM_IRQ_NR;
+      gp.value = &mach64Screen->irq;
+
+      ret = drmCommandWriteRead( driScreen->fd, DRM_MACH64_GETPARAM,
+				    &gp, sizeof(gp));
+      if (ret) {
+         fprintf(stderr, "DRM_MACH64_GETPARAM (MACH64_PARAM_IRQ_NR): %d\n", ret);
+         FREE( mach64Screen );
+         return NULL;
+      }
+   }
+
+   mach64Screen->mmio.handle = serverInfo->regs;
+   mach64Screen->mmio.size   = serverInfo->regsSize;
+   if ( drmMap( driScreen->fd,
+		mach64Screen->mmio.handle,
+		mach64Screen->mmio.size,
+		(drmAddressPtr)&mach64Screen->mmio.map ) != 0 ) {
+      FREE( mach64Screen );
+      return NULL;
+   }
+
+   mach64Screen->buffers = drmMapBufs( driScreen->fd );
+   if ( !mach64Screen->buffers ) {
+      drmUnmap( (drmAddress)mach64Screen->mmio.map,
+		mach64Screen->mmio.size );
+      FREE( mach64Screen );
+      return NULL;
+   }
+
+   if ( !mach64Screen->IsPCI ) {
+      mach64Screen->agpTextures.handle = serverInfo->agp;
+      mach64Screen->agpTextures.size   = serverInfo->agpSize;
+      if ( drmMap( driScreen->fd,
+		   mach64Screen->agpTextures.handle,
+		   mach64Screen->agpTextures.size,
+		   (drmAddressPtr)&mach64Screen->agpTextures.map ) ) {
+	 drmUnmapBufs( mach64Screen->buffers );
+	 drmUnmap( (drmAddress)mach64Screen->mmio.map, mach64Screen->mmio.size );
+	 FREE( mach64Screen );
+	 return NULL;
+      }
+   }
+
+   mach64Screen->AGPMode	= serverInfo->AGPMode;
+
+   mach64Screen->chipset	= serverInfo->chipset;
+   mach64Screen->width		= serverInfo->width;
+   mach64Screen->height		= serverInfo->height;
+   mach64Screen->mem		= serverInfo->mem;
+   mach64Screen->cpp		= serverInfo->cpp;
+
+   mach64Screen->frontOffset	= serverInfo->frontOffset;
+   mach64Screen->frontPitch	= serverInfo->frontPitch;
+   mach64Screen->backOffset	= serverInfo->backOffset;
+   mach64Screen->backPitch	= serverInfo->backPitch;
+   mach64Screen->depthOffset	= serverInfo->depthOffset;
+   mach64Screen->depthPitch	= serverInfo->depthPitch;
+
+   mach64Screen->texOffset[MACH64_CARD_HEAP] = serverInfo->textureOffset;
+   mach64Screen->texSize[MACH64_CARD_HEAP] = serverInfo->textureSize;
+   mach64Screen->logTexGranularity[MACH64_CARD_HEAP] =
+      serverInfo->logTextureGranularity;
+
+   if ( mach64Screen->IsPCI ) {
+      mach64Screen->numTexHeaps = MACH64_NR_TEX_HEAPS - 1;
+      mach64Screen->firstTexHeap = MACH64_CARD_HEAP;
+      mach64Screen->texOffset[MACH64_AGP_HEAP] = 0;
+      mach64Screen->texSize[MACH64_AGP_HEAP] = 0;
+      mach64Screen->logTexGranularity[MACH64_AGP_HEAP] = 0;
+   } else {
+      if (mach64Screen->texSize[MACH64_CARD_HEAP] > 0) {
+	 mach64Screen->numTexHeaps = MACH64_NR_TEX_HEAPS;
+	 mach64Screen->firstTexHeap = MACH64_CARD_HEAP;
+      } else {
+	 mach64Screen->numTexHeaps = MACH64_NR_TEX_HEAPS - 1;
+	 mach64Screen->firstTexHeap = MACH64_AGP_HEAP;
+      }
+      mach64Screen->texOffset[MACH64_AGP_HEAP] = serverInfo->agpTextureOffset;
+      mach64Screen->texSize[MACH64_AGP_HEAP] = serverInfo->agpSize;
+      mach64Screen->logTexGranularity[MACH64_AGP_HEAP] = serverInfo->logAgpTextureGranularity;
+   }
+
+   mach64Screen->driScreen = driScreen;
+
+   return mach64Screen;
+}
+
+/* Destroy the device specific screen private data struct.
+ */
+static void
+mach64DestroyScreen( __DRIscreenPrivate *driScreen )
+{
+   mach64ScreenRec *mach64Screen = (mach64ScreenRec *) driScreen->private;
+
+   if ( !mach64Screen )
+      return;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_DRI ) 
+      fprintf( stderr, "%s\n", __FUNCTION__ );
+
+   if ( !mach64Screen->IsPCI ) {
+      drmUnmap( (drmAddress)mach64Screen->agpTextures.map,
+		mach64Screen->agpTextures.size );
+   }
+
+   drmUnmapBufs( mach64Screen->buffers );
+   drmUnmap( (drmAddress)mach64Screen->mmio.map, mach64Screen->mmio.size );
+
+   FREE( mach64Screen );
+   driScreen->private = NULL;
+}
+
+/* Initialize the fullscreen mode.
+ */
+static GLboolean
+mach64OpenFullScreen( __DRIcontextPrivate *driContextPriv )
+{
+   return GL_TRUE;
+}
+
+/* Shut down the fullscreen mode.
+ */
+static GLboolean
+mach64CloseFullScreen( __DRIcontextPrivate *driContextPriv )
+{
+   return GL_TRUE;
+}
+
+
+/* Create and initialize the Mesa and driver specific pixmap buffer
+ * data.
+ */
+static GLboolean
+mach64CreateBuffer( __DRIscreenPrivate *driScrnPriv,
+		    __DRIdrawablePrivate *driDrawPriv,
+		    const __GLcontextModes *mesaVis,
+		    GLboolean isPixmap )
+{
+   if (isPixmap) {
+      return GL_FALSE; /* not implemented */
+   }
+   else {
+      driDrawPriv->driverPrivate = (void *) 
+         _mesa_create_framebuffer( mesaVis,
+                                   GL_FALSE,  /* software depth buffer? */
+                                   mesaVis->stencilBits > 0,
+                                   mesaVis->accumRedBits > 0,
+                                   mesaVis->alphaBits > 0 );
+      return (driDrawPriv->driverPrivate != NULL);
+   }
+}
+
+
+static void
+mach64DestroyBuffer(__DRIdrawablePrivate *driDrawPriv)
+{
+   _mesa_destroy_framebuffer((GLframebuffer *) (driDrawPriv->driverPrivate));
+}
+
+
+/* Copy the back color buffer to the front color buffer */
+static void
+mach64SwapBuffers(__DRIdrawablePrivate *dPriv)
+{
+   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+      mach64ContextPtr mmesa;
+      GLcontext *ctx;
+      mmesa = (mach64ContextPtr) dPriv->driContextPriv->driverPrivate;
+      ctx = mmesa->glCtx;
+      if (ctx->Visual.doubleBufferMode) {
+         _mesa_notifySwapBuffers( ctx );  /* flush pending rendering comands */
+         mach64CopyBuffer( dPriv );
+      }
+   }
+   else {
+      /* XXX this shouldn't be an error but we can't handle it for now */
+      _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__);
+   }
+}
+
+
+/* Initialize the driver specific screen private data.
+ */
+static GLboolean
+mach64InitDriver( __DRIscreenPrivate *driScreen )
+{
+   driScreen->private = (void *) mach64CreateScreen( driScreen );
+
+   if ( !driScreen->private ) {
+      mach64DestroyScreen( driScreen );
+      return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+
+
+/* This function is called by libGL.so as soon as libGL.so is loaded.
+ * This is where we register new extension functions with the dispatcher.
+ */
+void __driRegisterExtensions( void )
+{
+   PFNGLXENABLEEXTENSIONPROC glx_enable_extension;
+
+   if ( driCompareGLXAPIVersion( 20030317 ) >= 0 ) {
+      glx_enable_extension = (PFNGLXENABLEEXTENSIONPROC)
+	  glXGetProcAddress( "__glXEnableExtension" );
+
+      if ( glx_enable_extension != NULL ) {
+	 glx_enable_extension( "GLX_SGI_swap_control", GL_FALSE );
+	 glx_enable_extension( "GLX_SGI_video_sync", GL_FALSE );
+	 glx_enable_extension( "GLX_MESA_swap_control", GL_FALSE );
+      }
+   }
+}
+
+
+static struct __DriverAPIRec mach64API = {
+   .InitDriver      = mach64InitDriver,
+   .DestroyScreen   = mach64DestroyScreen,
+   .CreateContext   = mach64CreateContext,
+   .DestroyContext  = mach64DestroyContext,
+   .CreateBuffer    = mach64CreateBuffer,
+   .DestroyBuffer   = mach64DestroyBuffer,
+   .SwapBuffers     = mach64SwapBuffers,
+   .MakeCurrent     = mach64MakeCurrent,
+   .UnbindContext   = mach64UnbindContext,
+   .OpenFullScreen  = mach64OpenFullScreen,
+   .CloseFullScreen = mach64CloseFullScreen,
+   .GetSwapInfo     = NULL,
+   .GetMSC          = driGetMSC32,
+   .WaitForMSC      = driWaitForMSC32,
+   .WaitForSBC      = NULL,
+   .SwapBuffersMSC  = NULL
+};
+
+
+/*
+ * This is the bootstrap function for the driver.
+ * The __driCreateScreen name is the symbol that libGL.so fetches.
+ * Return:  pointer to a __DRIscreenPrivate.
+ */
+void *__driCreateScreen(Display *dpy, int scrn, __DRIscreen *psc,
+                        int numConfigs, __GLXvisualConfig *config)
+{
+   __DRIscreenPrivate *psp;
+   psp = __driUtilCreateScreen(dpy, scrn, psc, numConfigs, config, &mach64API);
+   return (void *) psp;
+}
diff --git a/src/mesa/drivers/dri/mach64/mach64_screen.h b/src/mesa/drivers/dri/mach64/mach64_screen.h
new file mode 100644
index 0000000000..3c4794a7c1
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_screen.h
@@ -0,0 +1,82 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#ifndef __MACH64_SCREEN_H__
+#define __MACH64_SCREEN_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+#include "mach64_sarea.h"
+#include "xmlconfig.h"
+
+typedef struct {
+   drmHandle handle;			/* Handle to the DRM region */
+   drmSize size;			/* Size of the DRM region */
+   drmAddress *map;			/* Mapping of the DRM region */
+} mach64RegionRec, *mach64RegionPtr;
+
+typedef struct {
+   int chipset;
+   int width;
+   int height;
+   int mem;
+   int cpp;
+
+   unsigned int	frontOffset;
+   unsigned int frontPitch;
+   unsigned int	backOffset;
+   unsigned int backPitch;
+
+   unsigned int	depthOffset;
+   unsigned int depthPitch;
+
+   int IsPCI;
+   int AGPMode;
+   unsigned int irq;			/* IRQ number (0 means none) */
+
+   /* Shared Texture data */
+   int firstTexHeap, numTexHeaps;
+   int texOffset[MACH64_NR_TEX_HEAPS];
+   int texSize[MACH64_NR_TEX_HEAPS];
+   int logTexGranularity[MACH64_NR_TEX_HEAPS];
+
+   mach64RegionRec mmio;
+   mach64RegionRec agpTextures;
+
+   drmBufMapPtr buffers;
+
+   __DRIscreenPrivate *driScreen;
+
+   driOptionCache optionCache;
+} mach64ScreenRec, *mach64ScreenPtr;
+
+#endif
+#endif /* __MACH64_SCREEN_H__ */
diff --git a/src/mesa/drivers/dri/mach64/mach64_span.c b/src/mesa/drivers/dri/mach64/mach64_span.c
new file mode 100644
index 0000000000..4c668ba90e
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_span.c
@@ -0,0 +1,274 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "mach64_context.h"
+#include "mach64_ioctl.h"
+#include "mach64_state.h"
+#include "mach64_span.h"
+#include "mach64_tex.h"
+
+#include "swrast/swrast.h"
+
+#define DBG 0
+
+#define LOCAL_VARS							\
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);			\
+   mach64ScreenRec *mach64Screen = mmesa->mach64Screen;			\
+   __DRIscreenPrivate *driScreen = mmesa->driScreen;			\
+   __DRIdrawablePrivate *dPriv = mmesa->driDrawable;			\
+   GLuint pitch = mmesa->drawPitch * mach64Screen->cpp;			\
+   GLuint height = dPriv->h;						\
+   char *buf = (char *)(driScreen->pFB +				\
+			mmesa->drawOffset +				\
+			(dPriv->x * mach64Screen->cpp) +		\
+			(dPriv->y * pitch));				\
+   char *read_buf = (char *)(driScreen->pFB +				\
+			     mmesa->readOffset +			\
+			     (dPriv->x * mach64Screen->cpp) +		\
+			     (dPriv->y * pitch));			\
+   GLushort p;								\
+   (void) read_buf; (void) buf; (void) p
+
+#define LOCAL_DEPTH_VARS						\
+   mach64ScreenRec *mach64Screen = mmesa->mach64Screen;			\
+   __DRIdrawablePrivate *dPriv = mmesa->driDrawable;			\
+   __DRIscreenPrivate *driScreen = mmesa->driScreen;			\
+   GLuint pitch = mach64Screen->depthPitch * 2;				\
+   GLuint height = dPriv->h;						\
+   char *buf = (char *)(driScreen->pFB +				\
+			mach64Screen->depthOffset +			\
+			dPriv->x * 2 +					\
+			dPriv->y * pitch)
+
+#define LOCAL_STENCIL_VARS	LOCAL_DEPTH_VARS
+
+#define CLIPPIXEL( _x, _y )						\
+   ((_x >= minx) && (_x < maxx) && (_y >= miny) && (_y < maxy))
+
+
+#define CLIPSPAN( _x, _y, _n, _x1, _n1, _i )				\
+   if (( _y < miny) || (_y >= maxy)) {					\
+      _n1 = 0, _x1 = x;							\
+   } else {								\
+      _n1 = _n;								\
+      _x1 = _x;								\
+      if (_x1 < minx) _i += (minx-_x1), n1 -= (minx-_x1), _x1 = minx;	\
+      if (_x1 + _n1 >= maxx) n1 -= (_x1 + n1 - maxx);			\
+   }
+
+#define Y_FLIP( _y )	(height - _y - 1)
+
+
+#define HW_LOCK()							\
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);			\
+   LOCK_HARDWARE( mmesa );						\
+   FINISH_DMA_LOCKED( mmesa );						\
+
+#define HW_CLIPLOOP()							\
+   do {									\
+      int _nc = mmesa->numClipRects;					\
+									\
+      while ( _nc-- ) {							\
+	 int minx = mmesa->pClipRects[_nc].x1 - mmesa->drawX;		\
+	 int miny = mmesa->pClipRects[_nc].y1 - mmesa->drawY;		\
+	 int maxx = mmesa->pClipRects[_nc].x2 - mmesa->drawX;		\
+	 int maxy = mmesa->pClipRects[_nc].y2 - mmesa->drawY;
+
+#define HW_ENDCLIPLOOP()						\
+      }									\
+   } while (0)
+
+#define HW_UNLOCK()							\
+   UNLOCK_HARDWARE( mmesa )						\
+
+
+
+/* ================================================================
+ * Color buffer
+ */
+
+/* 16 bit, RGB565 color spanline and pixel functions
+ */
+#undef INIT_MONO_PIXEL
+#define INIT_MONO_PIXEL(p, color) \
+  p = MACH64PACKCOLOR565( color[0], color[1], color[2] )
+
+#define WRITE_RGBA( _x, _y, r, g, b, a )				\
+   *(GLushort *)(buf + _x*2 + _y*pitch) = ((((int)r & 0xf8) << 8) |	\
+					   (((int)g & 0xfc) << 3) |	\
+					   (((int)b & 0xf8) >> 3))
+
+#define WRITE_PIXEL( _x, _y, p )					\
+    *(GLushort *)(buf + _x*2 + _y*pitch) = p
+
+#define READ_RGBA( rgba, _x, _y )					\
+    do {								\
+	GLushort p = *(GLushort *)(read_buf + _x*2 + _y*pitch);		\
+	rgba[0] = ((p >> 8) & 0xf8) * 255 / 0xf8;			\
+	rgba[1] = ((p >> 3) & 0xfc) * 255 / 0xfc;			\
+	rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8;			\
+	rgba[3] = 0xff;							\
+    } while (0)
+
+#define TAG(x) mach64##x##_RGB565
+#include "spantmp.h"
+
+
+
+/* 32 bit, ARGB8888 color spanline and pixel functions
+ */
+#undef INIT_MONO_PIXEL
+#define INIT_MONO_PIXEL(p, color) \
+  p = MACH64PACKCOLOR8888( color[0], color[1], color[2], color[3] )
+
+#define WRITE_RGBA( _x, _y, r, g, b, a )				\
+   *(GLuint *)(buf + _x*4 + _y*pitch) = ((b <<  0) |			\
+					 (g <<  8) |			\
+					 (r << 16) |			\
+					 (a << 24) )
+
+#define WRITE_PIXEL( _x, _y, p )					\
+   *(GLuint *)(buf + _x*4 + _y*pitch) = p
+
+#define READ_RGBA( rgba, _x, _y )					\
+do {									\
+   GLuint p = *(GLuint *)(read_buf + _x*4 + _y*pitch);			\
+   rgba[0] = (p >> 16) & 0xff;						\
+   rgba[1] = (p >>  8) & 0xff;						\
+   rgba[2] = (p >>  0) & 0xff;						\
+   rgba[3] = 0xff; /*(p >> 24) & 0xff;*/				\
+} while (0)
+
+#define TAG(x) mach64##x##_ARGB8888
+#include "spantmp.h"
+
+
+
+/* ================================================================
+ * Depth buffer
+ */
+
+/* 16 bit depthbuffer functions.
+ */
+#define WRITE_DEPTH( _x, _y, d )					\
+   *(GLushort *)(buf + _x*2 + _y*pitch) = d;
+
+#define READ_DEPTH( d, _x, _y )						\
+   d = *(GLushort *)(buf + _x*2 + _y*pitch);
+
+#define TAG(x) mach64##x##_16
+#include "depthtmp.h"
+
+
+/*
+ * This function is called to specify which buffer to read and write
+ * for software rasterization (swrast) fallbacks.  This doesn't necessarily
+ * correspond to glDrawBuffer() or glReadBuffer() calls.
+ */
+static void mach64DDSetBuffer( GLcontext *ctx,
+			       GLframebuffer *colorBuffer,
+			       GLuint bufferBit )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   switch ( bufferBit ) {
+   case FRONT_LEFT_BIT:
+      if (MACH64_DEBUG & DEBUG_VERBOSE_MSG)
+	 fprintf(stderr,"%s: FRONT_LEFT_BIT\n", __FUNCTION__);
+      mmesa->drawOffset = mmesa->readOffset = mmesa->mach64Screen->frontOffset;
+      mmesa->drawPitch  = mmesa->readPitch  = mmesa->mach64Screen->frontPitch;
+      break;
+   case BACK_LEFT_BIT:
+      if (MACH64_DEBUG & DEBUG_VERBOSE_MSG)
+	 fprintf(stderr,"%s: BACK_LEFT_BIT\n", __FUNCTION__);
+      mmesa->drawOffset = mmesa->readOffset = mmesa->mach64Screen->backOffset;
+      mmesa->drawPitch  = mmesa->readPitch  = mmesa->mach64Screen->backPitch;
+      break;
+   default:
+      break;
+   }
+}
+
+
+void mach64DDInitSpanFuncs( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
+
+   swdd->SetBuffer = mach64DDSetBuffer;
+
+
+   switch ( mmesa->mach64Screen->cpp ) {
+   case 2:
+      swdd->WriteRGBASpan	= mach64WriteRGBASpan_RGB565;
+      swdd->WriteRGBSpan	= mach64WriteRGBSpan_RGB565;
+      swdd->WriteMonoRGBASpan	= mach64WriteMonoRGBASpan_RGB565;
+      swdd->WriteRGBAPixels	= mach64WriteRGBAPixels_RGB565;
+      swdd->WriteMonoRGBAPixels	= mach64WriteMonoRGBAPixels_RGB565;
+      swdd->ReadRGBASpan	= mach64ReadRGBASpan_RGB565;
+      swdd->ReadRGBAPixels	= mach64ReadRGBAPixels_RGB565;
+      break;
+
+   case 4:
+      swdd->WriteRGBASpan	= mach64WriteRGBASpan_ARGB8888;
+      swdd->WriteRGBSpan	= mach64WriteRGBSpan_ARGB8888;
+      swdd->WriteMonoRGBASpan	= mach64WriteMonoRGBASpan_ARGB8888;
+      swdd->WriteRGBAPixels	= mach64WriteRGBAPixels_ARGB8888;
+      swdd->WriteMonoRGBAPixels	= mach64WriteMonoRGBAPixels_ARGB8888;
+      swdd->ReadRGBASpan	= mach64ReadRGBASpan_ARGB8888;
+      swdd->ReadRGBAPixels	= mach64ReadRGBAPixels_ARGB8888;
+
+      break;
+
+   default:
+      break;
+   }
+
+   /* Depth buffer is always 16 bit */
+   swdd->ReadDepthSpan		= mach64ReadDepthSpan_16;
+   swdd->WriteDepthSpan		= mach64WriteDepthSpan_16;
+   swdd->ReadDepthPixels	= mach64ReadDepthPixels_16;
+   swdd->WriteDepthPixels	= mach64WriteDepthPixels_16;
+  
+   /* No hardware stencil buffer */
+   swdd->ReadStencilSpan	= NULL;
+   swdd->WriteStencilSpan	= NULL;
+   swdd->ReadStencilPixels	= NULL;
+   swdd->WriteStencilPixels	= NULL;
+
+   swdd->WriteCI8Span		= NULL;
+   swdd->WriteCI32Span		= NULL;
+   swdd->WriteMonoCISpan	= NULL;
+   swdd->WriteCI32Pixels	= NULL;
+   swdd->WriteMonoCIPixels	= NULL;
+   swdd->ReadCI32Span		= NULL;
+   swdd->ReadCI32Pixels		= NULL;
+}
diff --git a/src/mesa/drivers/dri/mach64/mach64_span.h b/src/mesa/drivers/dri/mach64/mach64_span.h
new file mode 100644
index 0000000000..52883ba1e9
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_span.h
@@ -0,0 +1,39 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#ifndef __MACH64_SPAN_H__
+#define __MACH64_SPAN_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+extern void mach64DDInitSpanFuncs( GLcontext *ctx );
+
+#endif
+#endif
diff --git a/src/mesa/drivers/dri/mach64/mach64_state.c b/src/mesa/drivers/dri/mach64/mach64_state.c
new file mode 100644
index 0000000000..2c8e20d573
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_state.c
@@ -0,0 +1,1213 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "mach64_context.h"
+#include "mach64_state.h"
+#include "mach64_ioctl.h"
+#include "mach64_tris.h"
+#include "mach64_vb.h"
+#include "mach64_tex.h"
+
+#include "context.h"
+#include "enums.h"
+#include "colormac.h"
+#include "swrast/swrast.h"
+#include "array_cache/acache.h"
+#include "tnl/tnl.h"
+#include "swrast_setup/swrast_setup.h"
+
+#include "tnl/t_pipeline.h"
+
+
+/* =============================================================
+ * Alpha blending
+ */
+
+static void mach64UpdateAlphaMode( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint a = mmesa->setup.alpha_tst_cntl;
+   GLuint s = mmesa->setup.scale_3d_cntl;
+   GLuint m = mmesa->setup.dp_write_mask;
+
+   if ( ctx->Color.AlphaEnabled ) {
+      GLubyte ref;
+
+      CLAMPED_FLOAT_TO_UBYTE(ref, ctx->Color.AlphaRef);
+
+      a &= ~(MACH64_ALPHA_TEST_MASK | MACH64_REF_ALPHA_MASK);
+
+      switch ( ctx->Color.AlphaFunc ) {
+      case GL_NEVER:
+	 a |= MACH64_ALPHA_TEST_NEVER;
+	 break;
+      case GL_LESS:
+	 a |= MACH64_ALPHA_TEST_LESS;
+         break;
+      case GL_LEQUAL:
+	 a |= MACH64_ALPHA_TEST_LEQUAL;
+	 break;
+      case GL_EQUAL:
+	 a |= MACH64_ALPHA_TEST_EQUAL;
+	 break;
+      case GL_GEQUAL:
+	 a |= MACH64_ALPHA_TEST_GEQUAL;
+	 break;
+      case GL_GREATER:
+	 a |= MACH64_ALPHA_TEST_GREATER;
+	 break;
+      case GL_NOTEQUAL:
+	 a |= MACH64_ALPHA_TEST_NOTEQUAL;
+	 break;
+      case GL_ALWAYS:
+	 a |= MACH64_ALPHA_TEST_ALWAYS;
+	 break;
+      }
+
+      a |= (ref << MACH64_REF_ALPHA_SHIFT);
+      a |=  MACH64_ALPHA_TEST_EN;
+   } else {
+      a &= ~MACH64_ALPHA_TEST_EN;
+   }
+
+   FALLBACK( mmesa, MACH64_FALLBACK_BLEND_FUNC, GL_FALSE );
+
+   if ( ctx->Color.BlendEnabled ) {
+      s &= ~(MACH64_ALPHA_BLEND_SRC_MASK |
+	     MACH64_ALPHA_BLEND_DST_MASK |
+	     MACH64_ALPHA_BLEND_SAT);
+
+      switch ( ctx->Color.BlendSrcRGB ) {
+      case GL_ZERO:
+	 s |= MACH64_ALPHA_BLEND_SRC_ZERO;
+	 break;
+      case GL_ONE:
+	 s |= MACH64_ALPHA_BLEND_SRC_ONE;
+	 break;
+      case GL_DST_COLOR:
+	 s |= MACH64_ALPHA_BLEND_SRC_DSTCOLOR;
+	 break;
+      case GL_ONE_MINUS_DST_COLOR:
+	 s |= MACH64_ALPHA_BLEND_SRC_INVDSTCOLOR;
+	 break;
+      case GL_SRC_ALPHA:
+	 s |= MACH64_ALPHA_BLEND_SRC_SRCALPHA;
+	 break;
+      case GL_ONE_MINUS_SRC_ALPHA:
+	 s |= MACH64_ALPHA_BLEND_SRC_INVSRCALPHA;
+	 break;
+      case GL_DST_ALPHA:
+	 s |= MACH64_ALPHA_BLEND_SRC_DSTALPHA;
+	 break;
+      case GL_ONE_MINUS_DST_ALPHA:
+	 s |= MACH64_ALPHA_BLEND_SRC_INVDSTALPHA;
+	 break;
+      case GL_SRC_ALPHA_SATURATE:
+	 s |= (MACH64_ALPHA_BLEND_SRC_SRCALPHA |
+	       MACH64_ALPHA_BLEND_SAT);
+	 break;
+      default:
+         FALLBACK( mmesa, MACH64_FALLBACK_BLEND_FUNC, GL_TRUE );
+      }
+
+      switch ( ctx->Color.BlendDstRGB ) {
+      case GL_ZERO:
+	 s |= MACH64_ALPHA_BLEND_DST_ZERO;
+	 break;
+      case GL_ONE:
+	 s |= MACH64_ALPHA_BLEND_DST_ONE;
+	 break;
+      case GL_SRC_COLOR:
+	 s |= MACH64_ALPHA_BLEND_DST_SRCCOLOR;
+	 break;
+      case GL_ONE_MINUS_SRC_COLOR:
+	 s |= MACH64_ALPHA_BLEND_DST_INVSRCCOLOR;
+	 break;
+      case GL_SRC_ALPHA:
+	 s |= MACH64_ALPHA_BLEND_DST_SRCALPHA;
+	 break;
+      case GL_ONE_MINUS_SRC_ALPHA:
+	 s |= MACH64_ALPHA_BLEND_DST_INVSRCALPHA;
+	 break;
+      case GL_DST_ALPHA:
+	 s |= MACH64_ALPHA_BLEND_DST_DSTALPHA;
+	 break;
+      case GL_ONE_MINUS_DST_ALPHA:
+	 s |= MACH64_ALPHA_BLEND_DST_INVDSTALPHA;
+	 break;
+      default:
+         FALLBACK( mmesa, MACH64_FALLBACK_BLEND_FUNC, GL_TRUE );
+      }
+
+      m = 0xffffffff; /* Can't color mask and blend at the same time */
+      s &= ~MACH64_ALPHA_FOG_EN_FOG; /* Can't fog and blend at the same time */
+      s |=  MACH64_ALPHA_FOG_EN_ALPHA;
+   } else {
+      s &= ~MACH64_ALPHA_FOG_EN_ALPHA;
+   }
+
+   if ( mmesa->setup.alpha_tst_cntl != a ) {
+      mmesa->setup.alpha_tst_cntl = a;
+      mmesa->dirty |= MACH64_UPLOAD_Z_ALPHA_CNTL;
+   }
+   if ( mmesa->setup.scale_3d_cntl != s ) {
+      mmesa->setup.scale_3d_cntl = s;
+      mmesa->dirty |= MACH64_UPLOAD_SCALE_3D_CNTL;
+   }
+   if ( mmesa->setup.dp_write_mask != m ) {
+      mmesa->setup.dp_write_mask = m;
+      mmesa->dirty |= MACH64_UPLOAD_DP_WRITE_MASK;
+   }
+}
+
+static void mach64DDAlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+   mmesa->new_state |= MACH64_NEW_ALPHA;
+}
+
+static void mach64DDBlendEquationSeparate( GLcontext *ctx, 
+					   GLenum modeRGB, GLenum modeA )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   assert( modeRGB == modeA );
+   FLUSH_BATCH( mmesa );
+
+   /* BlendEquation affects ColorLogicOpEnabled
+    */
+   FALLBACK( MACH64_CONTEXT(ctx), MACH64_FALLBACK_LOGICOP,
+	     (ctx->Color.ColorLogicOpEnabled &&
+	      ctx->Color.LogicOp != GL_COPY));
+
+   /* Can only do blend addition, not min, max, subtract, etc. */
+   FALLBACK( MACH64_CONTEXT(ctx), MACH64_FALLBACK_BLEND_EQ,
+	     modeRGB != GL_FUNC_ADD);
+
+   mmesa->new_state |= MACH64_NEW_ALPHA;
+}
+
+static void mach64DDBlendFuncSeparate( GLcontext *ctx,
+				       GLenum sfactorRGB, GLenum dfactorRGB,
+				       GLenum sfactorA, GLenum dfactorA )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+   mmesa->new_state |= MACH64_NEW_ALPHA;
+}
+
+
+/* =============================================================
+ * Depth testing
+ */
+
+static void mach64UpdateZMode( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint z = mmesa->setup.z_cntl;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_MSG ) {
+      fprintf( stderr, "%s:\n", __FUNCTION__ );
+   }
+
+   if ( ctx->Depth.Test ) {
+      z &= ~MACH64_Z_TEST_MASK;
+
+      switch ( ctx->Depth.Func ) {
+      case GL_NEVER:
+	 z |= MACH64_Z_TEST_NEVER;
+	 break;
+      case GL_ALWAYS:
+	 z |= MACH64_Z_TEST_ALWAYS;
+	 break;
+      case GL_LESS:
+	 z |= MACH64_Z_TEST_LESS;
+	 break;
+      case GL_LEQUAL:
+	 z |= MACH64_Z_TEST_LEQUAL;
+	 break;
+      case GL_EQUAL:
+	 z |= MACH64_Z_TEST_EQUAL;
+	 break;
+      case GL_GEQUAL:
+	 z |= MACH64_Z_TEST_GEQUAL;
+	 break;
+      case GL_GREATER:
+	 z |= MACH64_Z_TEST_GREATER;
+	 break;
+      case GL_NOTEQUAL:
+	 z |= MACH64_Z_TEST_NOTEQUAL;
+	 break;
+      }
+
+      z |=  MACH64_Z_EN;
+   } else {
+      z &= ~MACH64_Z_EN;
+   }
+
+   if ( ctx->Depth.Mask ) {
+      z |=  MACH64_Z_MASK_EN;
+   } else {
+      z &= ~MACH64_Z_MASK_EN;
+   }
+
+   if ( mmesa->setup.z_cntl != z ) {
+      mmesa->setup.z_cntl = z;
+      mmesa->dirty |= MACH64_UPLOAD_Z_ALPHA_CNTL;
+   }
+}
+
+static void mach64DDDepthFunc( GLcontext *ctx, GLenum func )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+   mmesa->new_state |= MACH64_NEW_DEPTH;
+}
+
+static void mach64DDDepthMask( GLcontext *ctx, GLboolean flag )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+   mmesa->new_state |= MACH64_NEW_DEPTH;
+}
+
+static void mach64DDClearDepth( GLcontext *ctx, GLclampd d )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   /* Always have a 16-bit depth buffer.
+    */
+   mmesa->ClearDepth = d * 0xffff;
+}
+
+
+/* =============================================================
+ * Fog
+ */
+
+static void mach64UpdateFogAttrib( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   CARD32 s = mmesa->setup.scale_3d_cntl;
+   GLubyte c[4];
+   CARD32 col;
+
+   /* Can't fog if blending is on */
+   if ( ctx->Color.BlendEnabled )
+      return;
+
+   if ( ctx->Fog.Enabled ) {
+      s |= MACH64_ALPHA_FOG_EN_FOG;
+      s &= ~(MACH64_ALPHA_BLEND_SRC_MASK |
+	     MACH64_ALPHA_BLEND_DST_MASK |
+	     MACH64_ALPHA_BLEND_SAT);
+      /* From Utah-glx: "fog color is now dest and fog factor is alpha, so
+       * use GL_SRC_ALPHA GL_ONE_MINUS_SRC_ALPHA"
+       */
+      s |= (MACH64_ALPHA_BLEND_SRC_SRCALPHA | 
+	    MACH64_ALPHA_BLEND_DST_INVSRCALPHA);
+      /* From Utah-glx: "can't use texture alpha when fogging" */
+      s &= ~MACH64_TEX_MAP_AEN;
+   } else {
+      s &= ~(MACH64_ALPHA_BLEND_SRC_MASK |
+	     MACH64_ALPHA_BLEND_DST_MASK |
+	     MACH64_ALPHA_BLEND_SAT);
+      s |= (MACH64_ALPHA_BLEND_SRC_ONE | 
+	    MACH64_ALPHA_BLEND_DST_ZERO);
+      s &= ~MACH64_ALPHA_FOG_EN_FOG;
+   }
+
+   c[0] = FLOAT_TO_UBYTE( ctx->Fog.Color[0] );
+   c[1] = FLOAT_TO_UBYTE( ctx->Fog.Color[1] );
+   c[2] = FLOAT_TO_UBYTE( ctx->Fog.Color[2] );
+   c[3] = FLOAT_TO_UBYTE( ctx->Fog.Color[3] );
+
+   col = mach64PackColor( 4, c[0], c[1], c[2], c[3] );
+
+   if ( mmesa->setup.dp_fog_clr != col ) {
+      mmesa->setup.dp_fog_clr = col;
+      mmesa->dirty |= MACH64_UPLOAD_DP_FOG_CLR;
+   }
+   if ( mmesa->setup.scale_3d_cntl != s ) {
+      mmesa->setup.scale_3d_cntl = s;
+      mmesa->dirty |= MACH64_UPLOAD_SCALE_3D_CNTL;
+   }
+
+}
+
+static void mach64DDFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+   mmesa->new_state |= MACH64_NEW_FOG;
+}
+
+
+/* =============================================================
+ * Clipping
+ */
+
+static void mach64UpdateClipping( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   mach64ScreenPtr mach64Screen = mmesa->mach64Screen;
+
+   if ( mmesa->driDrawable ) {
+      __DRIdrawablePrivate *drawable = mmesa->driDrawable;
+      int x1 = 0;
+      int y1 = 0;
+      int x2 = drawable->w - 1;
+      int y2 = drawable->h - 1;
+
+      if ( ctx->Scissor.Enabled ) {
+	 if ( ctx->Scissor.X > x1 ) {
+	    x1 = ctx->Scissor.X;
+	 }
+	 if ( drawable->h - ctx->Scissor.Y - ctx->Scissor.Height > y1 ) {
+	    y1 = drawable->h - ctx->Scissor.Y - ctx->Scissor.Height;
+	 }
+	 if ( ctx->Scissor.X + ctx->Scissor.Width - 1 < x2 ) {
+	    x2 = ctx->Scissor.X + ctx->Scissor.Width - 1;
+	 }
+	 if ( drawable->h - ctx->Scissor.Y - 1 < y2 ) {
+	    y2 = drawable->h - ctx->Scissor.Y - 1;
+	 }
+      }
+
+      x1 += drawable->x;
+      y1 += drawable->y;
+      x2 += drawable->x;
+      y2 += drawable->y;
+
+      /* clamp to screen borders */
+      if (x1 < 0) x1 = 0;
+      if (y1 < 0) y1 = 0;
+      if (x2 < 0) x2 = 0;
+      if (y2 < 0) y2 = 0;
+      if (x2 > mach64Screen->width-1) x2 = mach64Screen->width-1;
+      if (y2 > mach64Screen->height-1) y2 = mach64Screen->height-1;
+
+      if ( MACH64_DEBUG & DEBUG_VERBOSE_MSG ) {
+	 fprintf( stderr, "%s: drawable %3d %3d %3d %3d\n",
+		  __FUNCTION__,
+		  drawable->x,
+		  drawable->y,
+		  drawable->w,
+		  drawable->h );
+	 fprintf( stderr, "%s:  scissor %3d %3d %3d %3d\n",
+		  __FUNCTION__,
+		  ctx->Scissor.X,
+		  ctx->Scissor.Y,
+		  ctx->Scissor.Width,
+		  ctx->Scissor.Height );
+	 fprintf( stderr, "%s:    final %3d %3d %3d %3d\n",
+		  __FUNCTION__, x1, y1, x2, y2 );
+	 fprintf( stderr, "\n" );
+      }
+
+      mmesa->setup.sc_top_bottom = ((y1 << 0) |
+				    (y2 << 16));
+
+      mmesa->setup.sc_left_right = ((x1 << 0) |
+				    (x2 << 16));
+
+       /* UPLOAD_MISC reduces the dirty state, we just need to
+       * emit the scissor to the SAREA.  We need to dirty cliprects
+       * since the scissor and cliprects are intersected to update the
+       * single hardware scissor
+       */
+      mmesa->dirty |= MACH64_UPLOAD_MISC | MACH64_UPLOAD_CLIPRECTS;
+   }
+}
+
+static void mach64DDScissor( GLcontext *ctx,
+			     GLint x, GLint y, GLsizei w, GLsizei h )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+   mmesa->new_state |= MACH64_NEW_CLIP;
+}
+
+
+/* =============================================================
+ * Culling
+ */
+
+static void mach64UpdateCull( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLfloat backface_sign = 1;
+
+   if ( ctx->Polygon.CullFlag /*&& ctx->PB->primitive == GL_POLYGON*/ ) {
+      backface_sign = 1;
+      switch ( ctx->Polygon.CullFaceMode ) {
+      case GL_BACK:
+	 if ( ctx->Polygon.FrontFace == GL_CCW )
+	    backface_sign = -1;
+	 break;
+      case GL_FRONT:
+	 if ( ctx->Polygon.FrontFace != GL_CCW )
+	    backface_sign = -1;
+	 break;
+      default:
+      case GL_FRONT_AND_BACK:
+	 backface_sign = 0;
+	 break;
+      }
+   } else {
+      backface_sign = 0;
+   }
+
+   mmesa->backface_sign = backface_sign;
+
+}
+
+static void mach64DDCullFace( GLcontext *ctx, GLenum mode )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+   mmesa->new_state |= MACH64_NEW_CULL;
+}
+
+static void mach64DDFrontFace( GLcontext *ctx, GLenum mode )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+   mmesa->new_state |= MACH64_NEW_CULL;
+}
+
+
+/* =============================================================
+ * Masks
+ */
+
+static void mach64UpdateMasks( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint mask = 0xffffffff;
+
+   /* mach64 can't color mask with alpha blending enabled */
+   if ( !ctx->Color.BlendEnabled ) {
+      mask = mach64PackColor( mmesa->mach64Screen->cpp,
+			      ctx->Color.ColorMask[RCOMP],
+			      ctx->Color.ColorMask[GCOMP],
+			      ctx->Color.ColorMask[BCOMP],
+			      ctx->Color.ColorMask[ACOMP] );
+   }
+
+   if ( mmesa->setup.dp_write_mask != mask ) {
+      mmesa->setup.dp_write_mask = mask;
+      mmesa->dirty |= MACH64_UPLOAD_DP_WRITE_MASK;
+   }
+}
+
+static void mach64DDColorMask( GLcontext *ctx,
+			       GLboolean r, GLboolean g,
+			       GLboolean b, GLboolean a )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+   mmesa->new_state |= MACH64_NEW_MASKS;
+}
+
+
+/* =============================================================
+ * Rendering attributes
+ *
+ * We really don't want to recalculate all this every time we bind a
+ * texture.  These things shouldn't change all that often, so it makes
+ * sense to break them out of the core texture state update routines.
+ */
+
+static void mach64UpdateSpecularLighting( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint a = mmesa->setup.alpha_tst_cntl;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_MSG ) {
+      fprintf( stderr, "%s:\n", __FUNCTION__ );
+   }
+
+   if ( ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR  &&
+        ctx->Light.Enabled ) {
+      a |=  MACH64_SPECULAR_LIGHT_EN;
+   } else {
+      a &= ~MACH64_SPECULAR_LIGHT_EN;
+   }
+
+   if ( mmesa->setup.alpha_tst_cntl != a ) {
+      mmesa->setup.alpha_tst_cntl = a;
+      mmesa->dirty |= MACH64_UPLOAD_Z_ALPHA_CNTL;
+      mmesa->new_state |= MACH64_NEW_CONTEXT;
+   }
+}
+
+static void mach64DDLightModelfv( GLcontext *ctx, GLenum pname,
+				  const GLfloat *param )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   if ( pname == GL_LIGHT_MODEL_COLOR_CONTROL ) {
+      FLUSH_BATCH( mmesa );
+      mach64UpdateSpecularLighting(ctx);
+   }
+}
+
+static void mach64DDShadeModel( GLcontext *ctx, GLenum mode )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint s = mmesa->setup.setup_cntl;
+
+   s &= ~MACH64_FLAT_SHADE_MASK;
+
+   switch ( mode ) {
+   case GL_FLAT:
+      s |= MACH64_FLAT_SHADE_VERTEX_3;
+      break;
+   case GL_SMOOTH:
+      s |= MACH64_FLAT_SHADE_OFF;
+      break;
+   default:
+      return;
+   }
+
+   if ( mmesa->setup.setup_cntl != s ) {
+      FLUSH_BATCH( mmesa );
+      mmesa->setup.setup_cntl = s;
+
+      mmesa->dirty |= MACH64_UPLOAD_SETUP_CNTL;
+   }
+}
+
+
+/* =============================================================
+ * Viewport
+ */
+
+
+void mach64CalcViewport( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+   GLfloat *m = mmesa->hw_viewport;
+
+   /* See also mach64_translate_vertex.
+    */
+   m[MAT_SX] =   v[MAT_SX];
+   m[MAT_TX] =   v[MAT_TX] + (GLfloat)mmesa->drawX + SUBPIXEL_X;
+   m[MAT_SY] = - v[MAT_SY];
+   m[MAT_TY] = - v[MAT_TY] + mmesa->driDrawable->h + (GLfloat)mmesa->drawY + SUBPIXEL_Y;
+   m[MAT_SZ] =   v[MAT_SZ] * mmesa->depth_scale;
+   m[MAT_TZ] =   v[MAT_TZ] * mmesa->depth_scale;
+
+   mmesa->SetupNewInputs = ~0;
+}
+
+static void mach64Viewport( GLcontext *ctx,
+			  GLint x, GLint y,
+			  GLsizei width, GLsizei height )
+{
+   mach64CalcViewport( ctx );
+}
+
+static void mach64DepthRange( GLcontext *ctx,
+			    GLclampd nearval, GLclampd farval )
+{
+   mach64CalcViewport( ctx );
+}
+
+
+/* =============================================================
+ * Miscellaneous
+ */
+
+static void mach64DDClearColor( GLcontext *ctx,
+				const GLfloat color[4] )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLubyte c[4];
+   
+   CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
+   CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
+   CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
+   CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
+
+   mmesa->ClearColor = mach64PackColor( mmesa->mach64Screen->cpp,
+					c[0], c[1], c[2], c[3] );
+}
+
+static void mach64DDLogicOpCode( GLcontext *ctx, GLenum opcode )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   
+   if ( ctx->Color.ColorLogicOpEnabled ) {
+      FLUSH_BATCH( mmesa );
+
+      FALLBACK( mmesa, MACH64_FALLBACK_LOGICOP, opcode != GL_COPY);
+   }
+}
+
+void mach64SetCliprects( GLcontext *ctx, GLenum mode )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   __DRIdrawablePrivate *dPriv = mmesa->driDrawable;
+
+   switch ( mode ) {
+   case GL_FRONT_LEFT:
+      mmesa->numClipRects = dPriv->numClipRects;
+      mmesa->pClipRects = (XF86DRIClipRectPtr)dPriv->pClipRects;
+      mmesa->drawX = dPriv->x;
+      mmesa->drawY = dPriv->y;
+      break;
+   case GL_BACK_LEFT:
+      if ( dPriv->numBackClipRects == 0 ) {
+	 mmesa->numClipRects = dPriv->numClipRects;
+	 mmesa->pClipRects = (XF86DRIClipRectPtr)dPriv->pClipRects;
+	 mmesa->drawX = dPriv->x;
+	 mmesa->drawY = dPriv->y;
+      } else {
+	 mmesa->numClipRects = dPriv->numBackClipRects;
+	 mmesa->pClipRects = (XF86DRIClipRectPtr)dPriv->pBackClipRects;
+	 mmesa->drawX = dPriv->backX;
+	 mmesa->drawY = dPriv->backY;
+      }
+      break;
+   default:
+      return;
+   }
+
+   mach64UpdateClipping( ctx );
+
+   mmesa->dirty |= MACH64_UPLOAD_CLIPRECTS;
+}
+
+static void mach64DDDrawBuffer( GLcontext *ctx, GLenum mode )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+
+   /*
+    * _DrawDestMask is easier to cope with than <mode>.
+    */
+   switch ( ctx->Color._DrawDestMask ) {
+   case FRONT_LEFT_BIT:
+      FALLBACK( mmesa, MACH64_FALLBACK_DRAW_BUFFER, GL_FALSE );
+      mach64SetCliprects( ctx, GL_FRONT_LEFT );
+      if (MACH64_DEBUG & DEBUG_VERBOSE_MSG)
+	 fprintf(stderr,"%s: FRONT_LEFT_BIT\n", __FUNCTION__);
+      break;
+   case BACK_LEFT_BIT:
+      FALLBACK( mmesa, MACH64_FALLBACK_DRAW_BUFFER, GL_FALSE );
+      mach64SetCliprects( ctx, GL_BACK_LEFT );
+      if (MACH64_DEBUG & DEBUG_VERBOSE_MSG)
+	 fprintf(stderr,"%s: BACK_LEFT_BIT\n", __FUNCTION__);
+      break;
+   default:
+      /* GL_NONE or GL_FRONT_AND_BACK or stereo left&right, etc */
+      FALLBACK( mmesa, MACH64_FALLBACK_DRAW_BUFFER, GL_TRUE );
+      if (MACH64_DEBUG & DEBUG_VERBOSE_MSG)
+	 fprintf(stderr,"%s: fallback (mode=%d)\n", __FUNCTION__, mode);
+      break;
+   }
+
+   /* We want to update the s/w rast state too so that mach64SetBuffer()
+    * gets called.
+    */
+   _swrast_DrawBuffer(ctx, mode);
+
+   mmesa->setup.dst_off_pitch = (((mmesa->drawPitch/8) << 22) |
+				 (mmesa->drawOffset >> 3));
+
+   mmesa->dirty |= MACH64_UPLOAD_DST_OFF_PITCH;
+}
+
+static void mach64DDReadBuffer( GLcontext *ctx, GLenum mode )
+{
+   /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */
+}
+
+/* =============================================================
+ * State enable/disable
+ */
+
+static void mach64DDEnable( GLcontext *ctx, GLenum cap, GLboolean state )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %s = %s )\n",
+	       __FUNCTION__, _mesa_lookup_enum_by_nr( cap ),
+	       state ? "GL_TRUE" : "GL_FALSE" );
+   }
+
+   switch ( cap ) {
+   case GL_ALPHA_TEST:
+      FLUSH_BATCH( mmesa );
+      mmesa->new_state |= MACH64_NEW_ALPHA;
+      break;
+
+   case GL_BLEND:
+      FLUSH_BATCH( mmesa );
+      mmesa->new_state |= MACH64_NEW_ALPHA;
+
+      /* enable(GL_BLEND) affects ColorLogicOpEnabled.
+       */
+      FALLBACK( mmesa, MACH64_FALLBACK_LOGICOP,
+		(ctx->Color.ColorLogicOpEnabled &&
+		 ctx->Color.LogicOp != GL_COPY));
+      break;
+
+   case GL_CULL_FACE:
+      FLUSH_BATCH( mmesa );
+      mmesa->new_state |= MACH64_NEW_CULL;
+      break;
+
+   case GL_DEPTH_TEST:
+      FLUSH_BATCH( mmesa );
+      mmesa->new_state |= MACH64_NEW_DEPTH;
+      break;
+
+   case GL_DITHER:
+      do {
+	 GLuint s = mmesa->setup.scale_3d_cntl;
+	 FLUSH_BATCH( mmesa );
+
+	 if ( ctx->Color.DitherFlag ) {
+	    /* Dithering causes problems w/ 24bpp depth */
+	    if ( mmesa->mach64Screen->cpp == 4 )
+	       s |=  MACH64_ROUND_EN;
+	    else
+	       s |=  MACH64_DITHER_EN;
+	 } else {
+	    s &= ~MACH64_DITHER_EN;
+	    s &= ~MACH64_ROUND_EN;
+	 }
+
+	 if ( mmesa->setup.scale_3d_cntl != s ) {
+	    mmesa->setup.scale_3d_cntl = s;
+	    mmesa->dirty |= ( MACH64_UPLOAD_SCALE_3D_CNTL );
+	 }
+      } while (0);
+      break;
+
+   case GL_FOG:
+      FLUSH_BATCH( mmesa );
+      mmesa->new_state |= MACH64_NEW_FOG;
+      break;
+
+   case GL_INDEX_LOGIC_OP:
+   case GL_COLOR_LOGIC_OP:
+      FLUSH_BATCH( mmesa );
+      FALLBACK( mmesa, MACH64_FALLBACK_LOGICOP,
+		state && ctx->Color.LogicOp != GL_COPY );
+      break;
+
+   case GL_LIGHTING:
+      mach64UpdateSpecularLighting(ctx);
+      break;
+
+   case GL_SCISSOR_TEST:
+      FLUSH_BATCH( mmesa );
+      mmesa->scissor = state;
+      mmesa->new_state |= MACH64_NEW_CLIP;
+      break;
+
+   case GL_STENCIL_TEST:
+      FLUSH_BATCH( mmesa );
+      FALLBACK( mmesa, MACH64_FALLBACK_STENCIL, state );
+      break;
+
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_3D:
+      FLUSH_BATCH( mmesa );
+      mmesa->new_state |= MACH64_NEW_TEXTURE;
+      break;
+
+   default:
+      return;
+   }
+}
+
+/* =============================================================
+ * Render mode
+ */
+
+static void mach64DDRenderMode( GLcontext *ctx, GLenum mode )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   FALLBACK( mmesa, MACH64_FALLBACK_RENDER_MODE, (mode != GL_RENDER) );
+}
+
+/* =============================================================
+ * State initialization, management
+ */
+
+static void mach64DDPrintDirty( const char *msg, GLuint state )
+{
+   fprintf( stderr,
+	    "%s: (0x%x) %s%s%s%s%s%s%s%s%s%s%s%s\n",
+	    msg,
+	    state,
+	    (state & MACH64_UPLOAD_DST_OFF_PITCH) ? "dst_off_pitch, " : "",
+	    (state & MACH64_UPLOAD_Z_ALPHA_CNTL)  ? "z_alpha_cntl, " : "",
+	    (state & MACH64_UPLOAD_SCALE_3D_CNTL) ? "scale_3d_cntl, " : "",
+	    (state & MACH64_UPLOAD_DP_FOG_CLR)    ? "dp_fog_clr, " : "",
+	    (state & MACH64_UPLOAD_DP_WRITE_MASK) ? "dp_write_mask, " : "",
+	    (state & MACH64_UPLOAD_DP_PIX_WIDTH)  ? "dp_pix_width, " : "",
+	    (state & MACH64_UPLOAD_SETUP_CNTL)    ? "setup_cntl, " : "",
+	    (state & MACH64_UPLOAD_MISC)          ? "misc, " : "",
+	    (state & MACH64_UPLOAD_TEXTURE)       ? "texture, " : "",
+	    (state & MACH64_UPLOAD_TEX0IMAGE)     ? "tex0 image, " : "",
+	    (state & MACH64_UPLOAD_TEX1IMAGE)     ? "tex1 image, " : "",
+	    (state & MACH64_UPLOAD_CLIPRECTS)     ? "cliprects, " : "" );
+}
+
+/*
+ * Load the current context's state into the hardware.
+ *
+ * NOTE: Be VERY careful about ensuring the context state is marked for
+ * upload, the only place it shouldn't be uploaded is when the setup
+ * state has changed in ReducedPrimitiveChange as this comes right after
+ * a state update.
+ *
+ * Blits of any type should always upload the context and masks after
+ * they are done.
+ */
+void mach64EmitHwStateLocked( mach64ContextPtr mmesa )
+{
+   ATISAREAPrivPtr sarea = mmesa->sarea;
+   mach64_context_regs_t *regs = &(mmesa->setup);
+   mach64TexObjPtr t0 = mmesa->CurrentTexObj[0];
+   mach64TexObjPtr t1 = mmesa->CurrentTexObj[1];
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_MSG ) {
+      mach64DDPrintDirty( __FUNCTION__, mmesa->dirty );
+   }
+
+   if ( t0 && t1 && mmesa->mach64Screen->numTexHeaps > 1 ) {
+      if (t0->heap != t1->heap || 
+	     (mmesa->dirty & MACH64_UPLOAD_TEX0IMAGE) ||
+	     (mmesa->dirty & MACH64_UPLOAD_TEX1IMAGE))
+	 mach64UploadMultiTexImages( mmesa, t0, t1 );
+   } else {
+      if ( mmesa->dirty & MACH64_UPLOAD_TEX0IMAGE ) {
+	 if ( t0 ) mach64UploadTexImages( mmesa, t0 );
+      }
+      if ( mmesa->dirty & MACH64_UPLOAD_TEX1IMAGE ) {
+	 if ( t1 ) mach64UploadTexImages( mmesa, t1 );
+      }
+   }
+
+   if ( mmesa->dirty & (MACH64_UPLOAD_CONTEXT | MACH64_UPLOAD_MISC) ) {
+      memcpy( &sarea->ContextState, regs,
+	      MACH64_NR_CONTEXT_REGS * sizeof(GLuint) );
+   }
+
+   if ( mmesa->dirty & MACH64_UPLOAD_TEXTURE ) {
+      mach64EmitTexStateLocked( mmesa, t0, t1 );
+   }
+
+   sarea->vertsize = mmesa->vertex_size;
+
+   /* Turn off the texture cache flushing.
+    */
+   mmesa->setup.tex_cntl &= ~MACH64_TEX_CACHE_FLUSH;
+
+   sarea->dirty |= mmesa->dirty;
+
+   mmesa->dirty &= MACH64_UPLOAD_CLIPRECTS;
+}
+
+static void mach64DDPrintState( const char *msg, GLuint flags )
+{
+   fprintf( stderr,
+	    "%s: (0x%x) %s%s%s%s%s%s%s%s%s\n",
+	    msg,
+	    flags,
+	    (flags & MACH64_NEW_CONTEXT)	? "context, " : "",
+	    (flags & MACH64_NEW_ALPHA)		? "alpha, " : "",
+	    (flags & MACH64_NEW_DEPTH)		? "depth, " : "",
+	    (flags & MACH64_NEW_FOG)		? "fog, " : "",
+	    (flags & MACH64_NEW_CLIP)		? "clip, " : "",
+	    (flags & MACH64_NEW_TEXTURE)	? "texture, " : "",
+	    (flags & MACH64_NEW_CULL)		? "cull, " : "",
+	    (flags & MACH64_NEW_MASKS)		? "masks, " : "",
+	    (flags & MACH64_NEW_WINDOW)		? "window, " : "" );
+}
+
+/* Update the hardware state */
+void mach64DDUpdateHWState( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   int new_state = mmesa->new_state;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_MSG ) {
+      fprintf( stderr, "%s:\n", __FUNCTION__ );
+   }
+
+   if ( new_state )
+   {
+      FLUSH_BATCH( mmesa );
+
+      mmesa->new_state = 0;
+
+      if ( MACH64_DEBUG & DEBUG_VERBOSE_MSG )
+	 mach64DDPrintState( __FUNCTION__, new_state );
+
+      /* Update the various parts of the context's state.
+       */
+      if ( new_state & MACH64_NEW_ALPHA )
+	 mach64UpdateAlphaMode( ctx );
+
+      if ( new_state & MACH64_NEW_DEPTH )
+	 mach64UpdateZMode( ctx );
+
+      if ( new_state & MACH64_NEW_FOG )
+	 mach64UpdateFogAttrib( ctx );
+
+      if ( new_state & MACH64_NEW_CLIP )
+	 mach64UpdateClipping( ctx );
+
+      if ( new_state & MACH64_NEW_WINDOW )
+	 mach64CalcViewport( ctx );
+
+      if ( new_state & MACH64_NEW_CULL )
+	 mach64UpdateCull( ctx );
+
+      if ( new_state & MACH64_NEW_MASKS )
+	 mach64UpdateMasks( ctx );
+
+      if ( new_state & MACH64_NEW_TEXTURE )
+	 mach64UpdateTextureState( ctx );
+   }
+}
+
+
+static void mach64DDInvalidateState( GLcontext *ctx, GLuint new_state )
+{
+   _swrast_InvalidateState( ctx, new_state );
+   _swsetup_InvalidateState( ctx, new_state );
+   _ac_InvalidateState( ctx, new_state );
+   _tnl_InvalidateState( ctx, new_state );
+   MACH64_CONTEXT(ctx)->NewGLState |= new_state;
+}
+
+
+/* Initialize the context's hardware state */
+void mach64DDInitState( mach64ContextPtr mmesa )
+{
+   GLuint format;
+
+   switch ( mmesa->mach64Screen->cpp ) {
+   case 2:
+      format = MACH64_DATATYPE_RGB565;
+      break;
+   case 4:
+      format = MACH64_DATATYPE_ARGB8888;
+      break;
+   default:
+      fprintf( stderr, "Error: Unsupported pixel depth... exiting\n" );
+      exit( -1 );
+   }
+
+   /* Always have a 16-bit depth buffer
+    * but Z coordinates are specified in 16.1 format to the setup engine.
+    */
+   mmesa->depth_scale = 2.0;
+
+   mmesa->ClearColor = 0x00000000;
+   mmesa->ClearDepth = 0x0000ffff;
+
+   mmesa->Fallback = 0;
+
+   if ( mmesa->glCtx->Visual.doubleBufferMode ) {
+      mmesa->drawOffset = mmesa->readOffset = mmesa->mach64Screen->backOffset;
+      mmesa->drawPitch  = mmesa->readPitch  = mmesa->mach64Screen->backPitch;
+   } else {
+      mmesa->drawOffset = mmesa->readOffset = mmesa->mach64Screen->frontOffset;
+      mmesa->drawPitch  = mmesa->readPitch  = mmesa->mach64Screen->frontPitch;
+   }
+
+   /* Harware state:
+    */
+   mmesa->setup.dst_off_pitch = (((mmesa->drawPitch/8) << 22) |
+				 (mmesa->drawOffset >> 3));
+
+   mmesa->setup.z_off_pitch = (((mmesa->mach64Screen->depthPitch/8) << 22) |
+			       (mmesa->mach64Screen->depthOffset >> 3));
+
+   mmesa->setup.z_cntl = (MACH64_Z_TEST_LESS |
+			  MACH64_Z_MASK_EN);
+
+   mmesa->setup.alpha_tst_cntl = (MACH64_ALPHA_TEST_ALWAYS |
+				  MACH64_ALPHA_DST_SRCALPHA |
+				  MACH64_ALPHA_TST_SRC_TEXEL |
+				  (0 << MACH64_REF_ALPHA_SHIFT));
+
+   mmesa->setup.scale_3d_cntl = (MACH64_SCALE_PIX_EXPAND_DYNAMIC_RANGE |
+				 /*  MACH64_SCALE_DITHER_ERROR_DIFFUSE | */
+				 MACH64_SCALE_DITHER_2D_TABLE |
+				 /*  MACH64_DITHER_INIT_CURRENT | */
+				 MACH64_DITHER_INIT_RESET |
+				 MACH64_SCALE_3D_FCN_SHADE |
+				 MACH64_ALPHA_FOG_DIS |
+				 MACH64_ALPHA_BLEND_SRC_ONE |
+				 MACH64_ALPHA_BLEND_DST_ZERO |
+				 MACH64_TEX_LIGHT_FCN_MODULATE |
+				 MACH64_MIP_MAP_DISABLE |
+				 MACH64_BILINEAR_TEX_EN |
+				 MACH64_TEX_BLEND_FCN_LINEAR);
+
+   /* GL spec says dithering initially enabled, but dithering causes
+    * problems w/ 24bpp depth
+    */
+   if ( mmesa->mach64Screen->cpp == 4 )
+      mmesa->setup.scale_3d_cntl |= MACH64_ROUND_EN;
+   else
+      mmesa->setup.scale_3d_cntl |= MACH64_DITHER_EN;
+
+   mmesa->setup.sc_left_right = 0x1fff0000;
+   mmesa->setup.sc_top_bottom = 0x3fff0000;
+
+   mmesa->setup.dp_fog_clr    = 0x00ffffff;
+   mmesa->setup.dp_write_mask = 0xffffffff;
+
+   mmesa->setup.dp_pix_width = ((format << 0) |
+				(format << 4) |
+				(format << 8) |
+				(format << 16) |
+				(format << 28));
+
+   mmesa->setup.dp_mix = (MACH64_BKGD_MIX_S |
+			  MACH64_FRGD_MIX_S);
+   mmesa->setup.dp_src = (MACH64_BKGD_SRC_3D |
+			  MACH64_FRGD_SRC_3D |
+			  MACH64_MONO_SRC_ONE);
+
+   mmesa->setup.clr_cmp_cntl  = 0x00000000;
+   mmesa->setup.gui_traj_cntl = (MACH64_DST_X_LEFT_TO_RIGHT |
+				 MACH64_DST_Y_TOP_TO_BOTTOM);
+
+   mmesa->setup.setup_cntl = (MACH64_FLAT_SHADE_OFF |
+			      MACH64_SOLID_MODE_OFF |
+			      MACH64_LOG_MAX_INC_ADJ);
+   mmesa->setup.setup_cntl = 0;
+
+   mmesa->setup.tex_size_pitch = 0x00000000;
+
+   mmesa->setup.tex_cntl = ((0 << MACH64_LOD_BIAS_SHIFT) |
+			    (0 << MACH64_COMP_FACTOR_SHIFT) |
+			    MACH64_COMP_COMBINE_MODULATE |
+			    MACH64_COMP_BLEND_NEAREST |
+			    MACH64_COMP_FILTER_NEAREST |
+			    /* MACH64_TEXTURE_TILING | */
+#ifdef MACH64_PREMULT_TEXCOORDS
+			    MACH64_TEX_ST_DIRECT | 
+#endif
+			    MACH64_TEX_SRC_LOCAL |
+			    MACH64_TEX_UNCOMPRESSED |
+			    MACH64_TEX_CACHE_FLUSH |
+			    MACH64_TEX_CACHE_SIZE_4K);
+
+   mmesa->setup.secondary_tex_off = 0x00000000;
+   mmesa->setup.tex_offset = 0x00000000;
+
+   mmesa->new_state = MACH64_NEW_ALL;
+}
+
+/* Initialize the driver's state functions.
+  */
+void mach64DDInitStateFuncs( GLcontext *ctx )
+{
+   ctx->Driver.UpdateState		= mach64DDInvalidateState;
+
+   ctx->Driver.ClearIndex		= NULL;
+   ctx->Driver.ClearColor		= mach64DDClearColor;
+   ctx->Driver.DrawBuffer		= mach64DDDrawBuffer;
+   ctx->Driver.ReadBuffer		= mach64DDReadBuffer;
+
+   ctx->Driver.IndexMask		= NULL;
+   ctx->Driver.ColorMask		= mach64DDColorMask;
+   ctx->Driver.AlphaFunc		= mach64DDAlphaFunc;
+   ctx->Driver.BlendEquationSeparate	= mach64DDBlendEquationSeparate;
+   ctx->Driver.BlendFuncSeparate	= mach64DDBlendFuncSeparate;
+   ctx->Driver.ClearDepth		= mach64DDClearDepth;
+   ctx->Driver.CullFace			= mach64DDCullFace;
+   ctx->Driver.FrontFace		= mach64DDFrontFace;
+   ctx->Driver.DepthFunc		= mach64DDDepthFunc;
+   ctx->Driver.DepthMask		= mach64DDDepthMask;
+   ctx->Driver.Enable			= mach64DDEnable;
+   ctx->Driver.Fogfv			= mach64DDFogfv;
+   ctx->Driver.Hint			= NULL;
+   ctx->Driver.Lightfv			= NULL;
+   ctx->Driver.LightModelfv		= mach64DDLightModelfv;
+   ctx->Driver.LogicOpcode		= mach64DDLogicOpCode;
+   ctx->Driver.PolygonMode		= NULL;
+   ctx->Driver.PolygonStipple		= NULL;
+   ctx->Driver.RenderMode		= mach64DDRenderMode;
+   ctx->Driver.Scissor			= mach64DDScissor;
+   ctx->Driver.ShadeModel		= mach64DDShadeModel;
+   ctx->Driver.ClearStencil		= NULL;
+   ctx->Driver.StencilFunc		= NULL;
+   ctx->Driver.StencilMask		= NULL;
+   ctx->Driver.StencilOp		= NULL;
+   
+   ctx->Driver.DepthRange		= mach64DepthRange;
+   ctx->Driver.Viewport			= mach64Viewport;
+   
+   /* Pixel path fallbacks.
+    */
+   ctx->Driver.Accum = _swrast_Accum;
+   ctx->Driver.Bitmap = _swrast_Bitmap;
+   ctx->Driver.CopyPixels = _swrast_CopyPixels;
+   ctx->Driver.DrawPixels = _swrast_DrawPixels;
+   ctx->Driver.ReadPixels = _swrast_ReadPixels;
+
+   /* Swrast hooks for imaging extensions:
+    */
+   ctx->Driver.CopyColorTable = _swrast_CopyColorTable;
+   ctx->Driver.CopyColorSubTable = _swrast_CopyColorSubTable;
+   ctx->Driver.CopyConvolutionFilter1D = _swrast_CopyConvolutionFilter1D;
+   ctx->Driver.CopyConvolutionFilter2D = _swrast_CopyConvolutionFilter2D;
+}
diff --git a/src/mesa/drivers/dri/mach64/mach64_state.h b/src/mesa/drivers/dri/mach64/mach64_state.h
new file mode 100644
index 0000000000..41d78716f5
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_state.h
@@ -0,0 +1,50 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ */
+
+#ifndef __MACH64_STATE_H__
+#define __MACH64_STATE_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+#include "mach64_context.h"
+
+extern void mach64DDInitState( mach64ContextPtr mmesa );
+extern void mach64DDInitStateFuncs( GLcontext *ctx );
+
+extern void mach64SetCliprects( GLcontext *ctx, GLenum mode );
+extern void mach64CalcViewport( GLcontext *ctx );
+
+extern void mach64DDUpdateState( GLcontext *ctx );
+extern void mach64DDUpdateHWState( GLcontext *ctx );
+
+extern void mach64EmitHwStateLocked( mach64ContextPtr mmesa );
+
+#endif
+#endif
diff --git a/src/mesa/drivers/dri/mach64/mach64_tex.c b/src/mesa/drivers/dri/mach64/mach64_tex.c
new file mode 100644
index 0000000000..02d76067ae
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_tex.c
@@ -0,0 +1,605 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "mach64_context.h"
+#include "mach64_ioctl.h"
+#include "mach64_state.h"
+#include "mach64_vb.h"
+#include "mach64_tris.h"
+#include "mach64_tex.h"
+
+#include "context.h"
+#include "macros.h"
+#include "simple_list.h"
+#include "enums.h"
+#include "texstore.h"
+#include "texformat.h"
+#include "imports.h"
+
+
+static void mach64SetTexWrap( mach64TexObjPtr t,
+			      GLenum swrap, GLenum twrap )
+{
+   switch ( swrap ) {
+   case GL_CLAMP:
+   case GL_CLAMP_TO_EDGE:
+   case GL_CLAMP_TO_BORDER:
+      t->ClampS = GL_TRUE;
+      break;
+   case GL_REPEAT:
+      t->ClampS = GL_FALSE;
+      break;
+   }
+
+   switch ( twrap ) {
+   case GL_CLAMP:
+   case GL_CLAMP_TO_EDGE:
+   case GL_CLAMP_TO_BORDER:
+      t->ClampT = GL_TRUE;
+      break;
+   case GL_REPEAT:
+      t->ClampT = GL_FALSE;
+      break;
+   }
+}
+
+static void mach64SetTexFilter( mach64TexObjPtr t,
+				GLenum minf, GLenum magf )
+{
+   switch ( minf ) {
+   case GL_NEAREST:
+   case GL_NEAREST_MIPMAP_NEAREST:
+   case GL_NEAREST_MIPMAP_LINEAR:
+      t->BilinearMin = GL_FALSE;
+      break;
+   case GL_LINEAR:
+   case GL_LINEAR_MIPMAP_NEAREST:
+   case GL_LINEAR_MIPMAP_LINEAR:
+      t->BilinearMin = GL_TRUE;
+      break;
+   }
+
+   switch ( magf ) {
+   case GL_NEAREST:
+      t->BilinearMag = GL_FALSE;
+      break;
+   case GL_LINEAR:
+      t->BilinearMag = GL_TRUE;
+      break;
+   }
+}
+
+static void mach64SetTexBorderColor( mach64TexObjPtr t, GLubyte c[4] )
+{
+#if 0
+   GLuint border = mach64PackColor( 4, c[0], c[1], c[2], c[3] );
+#endif
+}
+
+
+static mach64TexObjPtr
+mach64AllocTexObj( struct gl_texture_object *texObj )
+{
+   mach64TexObjPtr t;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API )
+      fprintf( stderr, "%s( %p )\n", __FUNCTION__, texObj );
+
+   t = (mach64TexObjPtr) CALLOC_STRUCT( mach64_texture_object );
+   if ( !t )
+      return NULL;
+
+   /* Initialize non-image-dependent parts of the state:
+    */
+   t->tObj = texObj;
+
+   t->offset = 0;
+
+   t->dirty = 1;
+
+   make_empty_list( t );
+
+   mach64SetTexWrap( t, texObj->WrapS, texObj->WrapT );
+   /*mach64SetTexMaxAnisotropy( t, texObj->MaxAnisotropy );*/
+   mach64SetTexFilter( t, texObj->MinFilter, texObj->MagFilter );
+   mach64SetTexBorderColor( t, texObj->_BorderChan );
+
+   return t;
+}
+
+
+/* Called by the _mesa_store_teximage[123]d() functions. */
+static const struct gl_texture_format *
+mach64ChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
+			   GLenum format, GLenum type )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   (void) format;
+   (void) type;
+
+   switch ( internalFormat ) {
+   case GL_ALPHA:
+   case GL_ALPHA4:
+   case GL_ALPHA8:
+   case GL_ALPHA12:
+   case GL_ALPHA16:
+   case 2:
+   case GL_LUMINANCE_ALPHA:
+   case GL_LUMINANCE4_ALPHA4:
+   case GL_LUMINANCE6_ALPHA2:
+   case GL_LUMINANCE8_ALPHA8:
+   case GL_LUMINANCE12_ALPHA4:
+   case GL_LUMINANCE12_ALPHA12:
+   case GL_LUMINANCE16_ALPHA16:
+   case 4:
+   case GL_RGBA:
+   case GL_RGBA2:
+      if (mmesa->mach64Screen->cpp == 4)
+         return &_mesa_texformat_argb8888;
+      else
+         return &_mesa_texformat_argb4444;
+
+   case GL_RGB5_A1:
+      if (mmesa->mach64Screen->cpp == 4)
+         return &_mesa_texformat_argb8888;
+      else
+         return &_mesa_texformat_argb1555;
+
+   case GL_RGBA8:
+   case GL_RGB10_A2:
+   case GL_RGBA12:
+   case GL_RGBA16:
+   case GL_RGBA4:
+      if (mmesa->mach64Screen->cpp == 4)
+         return &_mesa_texformat_argb8888;
+      else
+         return &_mesa_texformat_argb4444;
+
+   case 3:
+   case GL_RGB:
+   case GL_R3_G3_B2:
+   case GL_RGB4:
+   case GL_RGB5:
+   case GL_RGB8:
+   case GL_RGB10:
+   case GL_RGB12:
+   case GL_RGB16:
+      if (mmesa->mach64Screen->cpp == 4)
+         return &_mesa_texformat_argb8888;
+      else
+         return &_mesa_texformat_rgb565;
+
+   case 1:
+   case GL_LUMINANCE:
+   case GL_LUMINANCE4:
+   case GL_LUMINANCE8:
+   case GL_LUMINANCE12:
+   case GL_LUMINANCE16:
+      if (mmesa->mach64Screen->cpp == 4)
+         return &_mesa_texformat_argb8888; /* inefficient but accurate */
+      else
+         return &_mesa_texformat_argb1555;
+
+   case GL_INTENSITY4:
+   case GL_INTENSITY:
+   case GL_INTENSITY8:
+   case GL_INTENSITY12:
+   case GL_INTENSITY16:
+      if (mmesa->mach64Screen->cpp == 4)
+         return &_mesa_texformat_argb8888; /* inefficient but accurate */
+      else
+         return &_mesa_texformat_argb4444;
+
+   case GL_COLOR_INDEX:
+   case GL_COLOR_INDEX1_EXT:
+   case GL_COLOR_INDEX2_EXT:
+   case GL_COLOR_INDEX4_EXT:
+   case GL_COLOR_INDEX8_EXT:
+   case GL_COLOR_INDEX12_EXT:
+   case GL_COLOR_INDEX16_EXT:
+      return &_mesa_texformat_ci8;
+
+   case GL_YCBCR_MESA:
+      if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
+          type == GL_UNSIGNED_BYTE)
+         return &_mesa_texformat_ycbcr;
+      else
+         return &_mesa_texformat_ycbcr_rev;
+
+   default:
+      _mesa_problem( ctx, "unexpected format in %s", __FUNCTION__ );
+      return NULL;
+   }
+}
+
+static void mach64TexImage1D( GLcontext *ctx, GLenum target, GLint level,
+			    GLint internalFormat,
+			    GLint width, GLint border,
+			    GLenum format, GLenum type, const GLvoid *pixels,
+			    const struct gl_pixelstore_attrib *packing,
+			    struct gl_texture_object *texObj,
+			    struct gl_texture_image *texImage )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   mach64TexObjPtr t = (mach64TexObjPtr) texObj->DriverData;
+
+   if ( t ) {
+      mach64SwapOutTexObj( mmesa, t );
+   }
+   else {
+      t = mach64AllocTexObj(texObj);
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D");
+         return;
+      }
+      texObj->DriverData = t;
+   }
+
+   /* Note, this will call mach64ChooseTextureFormat */
+   _mesa_store_teximage1d( ctx, target, level, internalFormat,
+			   width, border, format, type,
+			   pixels, packing, texObj, texImage );
+
+   mmesa->new_state |= MACH64_NEW_TEXTURE;
+}
+
+static void mach64TexSubImage1D( GLcontext *ctx,
+				 GLenum target,
+				 GLint level,
+				 GLint xoffset,
+				 GLsizei width,
+				 GLenum format, GLenum type,
+				 const GLvoid *pixels,
+				 const struct gl_pixelstore_attrib *packing,
+				 struct gl_texture_object *texObj,
+				 struct gl_texture_image *texImage )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   mach64TexObjPtr t = (mach64TexObjPtr) texObj->DriverData;
+
+   assert( t ); /* this _should_ be true */
+   if ( t ) {
+      mach64SwapOutTexObj( mmesa, t );
+   }
+   else {
+      t = mach64AllocTexObj(texObj);
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D");
+         return;
+      }
+      texObj->DriverData = t;
+   }
+
+   _mesa_store_texsubimage1d(ctx, target, level, xoffset, width,
+			     format, type, pixels, packing, texObj,
+			     texImage);
+
+   mmesa->new_state |= MACH64_NEW_TEXTURE;
+}
+
+static void mach64TexImage2D( GLcontext *ctx, GLenum target, GLint level,
+			      GLint internalFormat,
+			      GLint width, GLint height, GLint border,
+			      GLenum format, GLenum type, const GLvoid *pixels,
+			      const struct gl_pixelstore_attrib *packing,
+			      struct gl_texture_object *texObj,
+			      struct gl_texture_image *texImage )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   mach64TexObjPtr t = (mach64TexObjPtr) texObj->DriverData;
+
+   if ( t ) {
+      mach64SwapOutTexObj( mmesa, t );
+   }
+   else {
+      t = mach64AllocTexObj(texObj);
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
+         return;
+      }
+      texObj->DriverData = t;
+   }
+
+   /* Note, this will call mach64ChooseTextureFormat */
+   _mesa_store_teximage2d( ctx, target, level, internalFormat,
+			   width, height, border, format, type, pixels,
+			   &ctx->Unpack, texObj, texImage );
+
+   mmesa->new_state |= MACH64_NEW_TEXTURE;
+}
+
+static void mach64TexSubImage2D( GLcontext *ctx,
+				 GLenum target,
+				 GLint level,
+				 GLint xoffset, GLint yoffset,
+				 GLsizei width, GLsizei height,
+				 GLenum format, GLenum type,
+				 const GLvoid *pixels,
+				 const struct gl_pixelstore_attrib *packing,
+				 struct gl_texture_object *texObj,
+				 struct gl_texture_image *texImage )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   mach64TexObjPtr t = (mach64TexObjPtr) texObj->DriverData;
+
+   assert( t ); /* this _should_ be true */
+   if ( t ) {
+      mach64SwapOutTexObj( mmesa, t );
+   }
+   else {
+      t = mach64AllocTexObj(texObj);
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
+         return;
+      }
+      texObj->DriverData = t;
+   }
+
+   _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
+			     height, format, type, pixels, packing, texObj,
+			     texImage);
+
+   mmesa->new_state |= MACH64_NEW_TEXTURE;
+}
+
+/* Due to the way we must program texture state into the Rage Pro,
+ * we must leave these calculations to the absolute last minute.
+ */
+void mach64EmitTexStateLocked( mach64ContextPtr mmesa,
+			       mach64TexObjPtr t0,
+			       mach64TexObjPtr t1 )
+{
+   ATISAREAPrivPtr sarea = mmesa->sarea;
+   mach64_context_regs_t *regs = &(mmesa->setup);
+
+   /* for multitex, both textures must be local or AGP */
+   if ( t0 && t1 )
+      assert(t0->heap == t1->heap);
+
+   if ( t0 ) {
+      if (t0->heap == MACH64_CARD_HEAP) {
+#if ENABLE_PERF_BOXES
+	 mmesa->c_texsrc_card++;
+#endif
+	 mmesa->setup.tex_cntl &= ~MACH64_TEX_SRC_AGP;
+      } else {
+#if ENABLE_PERF_BOXES
+	 mmesa->c_texsrc_agp++;
+#endif
+	 mmesa->setup.tex_cntl |= MACH64_TEX_SRC_AGP;
+      }
+      mmesa->setup.tex_offset = t0->offset;
+   }
+
+   if ( t1 ) {
+      mmesa->setup.secondary_tex_off = t1->offset;
+   }
+
+   memcpy( &sarea->ContextState.tex_size_pitch, &regs->tex_size_pitch,
+	   MACH64_NR_TEXTURE_REGS * sizeof(GLuint) );
+}
+
+
+/* ================================================================
+ * Device Driver API texture functions
+ */
+
+static void mach64DDTexEnv( GLcontext *ctx, GLenum target,
+			    GLenum pname, const GLfloat *param )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+#if 0
+   struct gl_texture_unit *texUnit;
+   GLubyte c[4];
+#endif
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %s )\n",
+	       __FUNCTION__, _mesa_lookup_enum_by_nr( pname ) );
+   }
+
+   switch ( pname ) {
+   case GL_TEXTURE_ENV_MODE:
+      FLUSH_BATCH( mmesa );
+      mmesa->new_state |= MACH64_NEW_TEXTURE | MACH64_NEW_ALPHA;
+      break;
+
+#if 0
+   case GL_TEXTURE_ENV_COLOR:
+      texUnit = &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
+      CLAMPED_FLOAT_TO_UBYTE( c[0], texUnit->EnvColor[0] );
+      CLAMPED_FLOAT_TO_UBYTE( c[1], texUnit->EnvColor[1] );
+      CLAMPED_FLOAT_TO_UBYTE( c[2], texUnit->EnvColor[2] );
+      CLAMPED_FLOAT_TO_UBYTE( c[3], texUnit->EnvColor[3] );
+      mmesa->env_color = mach64PackColor( 32, c[0], c[1], c[2], c[3] );
+      if ( mmesa->setup.constant_color_c != mmesa->env_color ) {
+	 FLUSH_BATCH( mmesa );
+	 mmesa->setup.constant_color_c = mmesa->env_color;
+
+	 mmesa->new_state |= MACH64_NEW_TEXTURE;
+
+	 /* More complex multitexture/multipass fallbacks for GL_BLEND
+	  * can be done later, but this allows a single pass GL_BLEND
+	  * in some cases (ie. Performer town demo).
+	  */
+	 mmesa->blend_flags &= ~MACH64_BLEND_ENV_COLOR;
+	 if ( mmesa->env_color != 0x00000000 &&
+	      mmesa->env_color != 0xff000000 &&
+	      mmesa->env_color != 0x00ffffff &&
+	      mmesa->env_color != 0xffffffff )) {	
+	    mmesa->blend_flags |= MACH64_BLEND_ENV_COLOR;
+	 }
+      }
+      break;
+#endif
+
+   default:
+      return;
+   }
+}
+
+static void mach64DDTexParameter( GLcontext *ctx, GLenum target,
+				  struct gl_texture_object *tObj,
+				  GLenum pname, const GLfloat *params )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   mach64TexObjPtr t = (mach64TexObjPtr)tObj->DriverData;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %s )\n",
+	       __FUNCTION__, _mesa_lookup_enum_by_nr( pname ) );
+   }
+
+   if ( ( target != GL_TEXTURE_2D ) &&
+	( target != GL_TEXTURE_1D ) ) {
+      return;
+   }
+
+   if (!t) {
+      t = mach64AllocTexObj(tObj);
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexParameter");
+         return;
+      }
+      tObj->DriverData = t;
+   }
+
+   switch ( pname ) {
+   case GL_TEXTURE_MIN_FILTER:
+   case GL_TEXTURE_MAG_FILTER:
+      if ( t->bound ) FLUSH_BATCH( mmesa );
+      mach64SetTexFilter( t, tObj->MinFilter, tObj->MagFilter );
+      break;
+
+   case GL_TEXTURE_WRAP_S:
+   case GL_TEXTURE_WRAP_T:
+      if ( t->bound ) FLUSH_BATCH( mmesa );
+      mach64SetTexWrap( t, tObj->WrapS, tObj->WrapT );
+      break;
+
+   case GL_TEXTURE_BORDER_COLOR:
+      if ( t->bound ) FLUSH_BATCH( mmesa );
+      mach64SetTexBorderColor( t, tObj->_BorderChan );
+      break;
+
+   case GL_TEXTURE_BASE_LEVEL:
+      /* From Radeon/Rage128:
+       * This isn't the most efficient solution but there doesn't appear to
+       * be a nice alternative.  Since there's no LOD clamping,
+       * we just have to rely on loading the right subset of mipmap levels
+       * to simulate a clamped LOD.  
+       *
+       * For mach64 we're only concerned with the base level
+       * since that's the only texture we upload.
+       */
+      if ( t->bound ) FLUSH_BATCH( mmesa );
+      mach64SwapOutTexObj( mmesa, t );
+      break;
+
+   default:
+      return;
+   }
+
+   mmesa->new_state |= MACH64_NEW_TEXTURE;
+}
+
+static void mach64DDBindTexture( GLcontext *ctx, GLenum target,
+				 struct gl_texture_object *tObj )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLint unit = ctx->Texture.CurrentUnit;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %p ) unit=%d\n",
+	       __FUNCTION__, tObj, unit );
+   }
+
+   FLUSH_BATCH( mmesa );
+
+   if ( mmesa->CurrentTexObj[unit] ) {
+      mmesa->CurrentTexObj[unit]->bound &= ~(unit+1);
+      mmesa->CurrentTexObj[unit] = NULL;
+   }
+
+   mmesa->new_state |= MACH64_NEW_TEXTURE;
+}
+
+static void mach64DDDeleteTexture( GLcontext *ctx,
+				   struct gl_texture_object *tObj )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   mach64TexObjPtr t = (mach64TexObjPtr)tObj->DriverData;
+
+   if ( t ) {
+      if ( t->bound && mmesa ) {
+	 FLUSH_BATCH( mmesa );
+
+	 mmesa->CurrentTexObj[t->bound-1] = 0;
+	 mmesa->new_state |= MACH64_NEW_TEXTURE;
+      }
+
+      mach64DestroyTexObj( mmesa, t );
+      tObj->DriverData = NULL;
+   }
+}
+
+static GLboolean mach64DDIsTextureResident( GLcontext *ctx,
+					    struct gl_texture_object *tObj )
+{
+   mach64TexObjPtr t = (mach64TexObjPtr)tObj->DriverData;
+
+   return ( t && t->memBlock );
+}
+
+
+void mach64DDInitTextureFuncs( GLcontext *ctx )
+{
+   ctx->Driver.TexEnv			= mach64DDTexEnv;
+   ctx->Driver.ChooseTextureFormat	= mach64ChooseTextureFormat;
+   ctx->Driver.TexImage1D		= mach64TexImage1D;
+   ctx->Driver.TexSubImage1D		= mach64TexSubImage1D;
+   ctx->Driver.TexImage2D		= mach64TexImage2D;
+   ctx->Driver.TexSubImage2D		= mach64TexSubImage2D;
+   ctx->Driver.TexImage3D               = _mesa_store_teximage3d;
+   ctx->Driver.TexSubImage3D            = _mesa_store_texsubimage3d;
+   ctx->Driver.CopyTexImage1D           = _swrast_copy_teximage1d;
+   ctx->Driver.CopyTexImage2D           = _swrast_copy_teximage2d;
+   ctx->Driver.CopyTexSubImage1D        = _swrast_copy_texsubimage1d;
+   ctx->Driver.CopyTexSubImage2D        = _swrast_copy_texsubimage2d;
+   ctx->Driver.CopyTexSubImage3D        = _swrast_copy_texsubimage3d;
+   ctx->Driver.TexParameter		= mach64DDTexParameter;
+   ctx->Driver.BindTexture		= mach64DDBindTexture;
+   ctx->Driver.DeleteTexture		= mach64DDDeleteTexture;
+   ctx->Driver.UpdateTexturePalette	= NULL;
+   ctx->Driver.ActiveTexture		= NULL;
+   ctx->Driver.IsTextureResident	= mach64DDIsTextureResident;
+   ctx->Driver.PrioritizeTexture	= NULL;
+}
diff --git a/src/mesa/drivers/dri/mach64/mach64_tex.h b/src/mesa/drivers/dri/mach64/mach64_tex.h
new file mode 100644
index 0000000000..ecd36db325
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_tex.h
@@ -0,0 +1,103 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#ifndef __MACH64_TEX_H__
+#define __MACH64_TEX_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+extern void mach64UpdateTextureState( GLcontext *ctx );
+
+extern void mach64SwapOutTexObj( mach64ContextPtr mach64ctx,
+				 mach64TexObjPtr t );
+
+extern void mach64UploadTexImages( mach64ContextPtr mach64ctx,
+				   mach64TexObjPtr t );
+
+extern void mach64UploadMultiTexImages( mach64ContextPtr mach64ctx,
+					mach64TexObjPtr t0, mach64TexObjPtr t1 );
+
+extern void mach64AgeTextures( mach64ContextPtr mach64ctx, int heap );
+extern void mach64DestroyTexObj( mach64ContextPtr mach64ctx,
+				 mach64TexObjPtr t );
+
+extern void mach64UpdateTexLRU( mach64ContextPtr mach64ctx,
+				mach64TexObjPtr t );
+
+extern void mach64PrintLocalLRU( mach64ContextPtr mach64ctx, int heap );
+extern void mach64PrintGlobalLRU( mach64ContextPtr mach64ctx, int heap );
+
+extern void mach64EmitTexStateLocked( mach64ContextPtr mmesa,
+				      mach64TexObjPtr t0,
+				      mach64TexObjPtr t1 );
+
+extern void mach64DDInitTextureFuncs( GLcontext *ctx );
+
+
+/* ================================================================
+ * Color conversion macros:
+ */
+
+#define MACH64PACKCOLOR332(r, g, b)					\
+   (((r) & 0xe0) | (((g) & 0xe0) >> 3) | (((b) & 0xc0) >> 6))
+
+#define MACH64PACKCOLOR1555(r, g, b, a)					\
+   ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) |	\
+    ((a) ? 0x8000 : 0))
+
+#define MACH64PACKCOLOR565(r, g, b)					\
+   ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3))
+
+#define MACH64PACKCOLOR888(r, g, b)					\
+   (((r) << 16) | ((g) << 8) | (b))
+
+#define MACH64PACKCOLOR8888(r, g, b, a)					\
+   (((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
+
+#define MACH64PACKCOLOR4444(r, g, b, a)					\
+   ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4))
+
+static __inline__ GLuint mach64PackColor( GLuint cpp,
+					  GLubyte r, GLubyte g,
+					  GLubyte b, GLubyte a )
+{
+   switch ( cpp ) {
+   case 2:
+      return MACH64PACKCOLOR565( r, g, b );
+   case 4:
+      return MACH64PACKCOLOR8888( r, g, b, a );
+   default:
+      return 0;
+   }
+}
+
+#endif
+#endif
diff --git a/src/mesa/drivers/dri/mach64/mach64_texmem.c b/src/mesa/drivers/dri/mach64/mach64_texmem.c
new file mode 100644
index 0000000000..44dd888a96
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_texmem.c
@@ -0,0 +1,867 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 1999, 2000 ATI Technologies Inc. and Precision Insight, Inc.,
+ *                                                Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * ATI, PRECISION INSIGHT AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Leif Delgass <ldelgass@retinalburn.net>
+ *   Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "mach64_context.h"
+#include "mach64_state.h"
+#include "mach64_ioctl.h"
+#include "mach64_vb.h"
+#include "mach64_tris.h"
+#include "mach64_tex.h"
+
+#include "context.h"
+#include "macros.h"
+#include "simple_list.h"
+#include "texformat.h"
+#include "imports.h"
+
+
+/* Destroy hardware state associated with texture `t'.
+ */
+void mach64DestroyTexObj( mach64ContextPtr mmesa, mach64TexObjPtr t )
+{
+#if ENABLE_PERF_BOXES
+   /* Bump the performace counter */
+   if (mmesa)
+      mmesa->c_textureSwaps++;
+#endif
+   if ( !t ) return;
+
+#if 0
+   if ( t->tObj && t->memBlock && mmesa ) {
+      /* not a placeholder, so release from global LRU if necessary */
+      int heap = t->heap;
+      drmTextureRegion *list = mmesa->sarea->texList[heap];
+      int log2sz = mmesa->mach64Screen->logTexGranularity[heap];
+      int start = t->memBlock->ofs >> log2sz;
+      int end = (t->memBlock->ofs + t->memBlock->size - 1) >> log2sz;
+      int i;
+
+      mmesa->lastTexAge[heap] = ++mmesa->sarea->texAge[heap];
+
+      /* Update the global LRU */
+      for ( i = start ; i <= end ; i++ ) {
+	 /* do we own this block? */
+	 if (list[i].in_use == mmesa->hHWContext) {
+	    list[i].in_use = 0;
+	    list[i].age = mmesa->lastTexAge[heap];
+
+	    /* remove_from_list(i) */
+	    list[(GLuint)list[i].next].prev = list[i].prev;
+	    list[(GLuint)list[i].prev].next = list[i].next;
+	 }
+      }
+   }
+#endif
+
+   if ( t->memBlock ) {
+      mmFreeMem( t->memBlock );
+      t->memBlock = NULL;
+   }
+
+   if ( t->tObj ) {
+      t->tObj->DriverData = NULL;
+   }
+
+   if ( t->bound && mmesa )
+      mmesa->CurrentTexObj[t->bound-1] = NULL;
+
+   remove_from_list( t );
+   FREE( t );
+}
+
+/* Keep track of swapped out texture objects.
+ */
+void mach64SwapOutTexObj( mach64ContextPtr mmesa,
+			  mach64TexObjPtr t )
+{
+#if ENABLE_PERF_BOXES
+   /* Bump the performace counter */
+   if (mmesa)
+     mmesa->c_textureSwaps++;
+#endif
+
+#if 0
+   if ( t->tObj && t->memBlock && mmesa ) {
+      /* not a placeholder, so release from global LRU if necessary */
+      int heap = t->heap;
+      drmTextureRegion *list = mmesa->sarea->texList[heap];
+      int log2sz = mmesa->mach64Screen->logTexGranularity[heap];
+      int start = t->memBlock->ofs >> log2sz;
+      int end = (t->memBlock->ofs + t->memBlock->size - 1) >> log2sz;
+      int i;
+
+      mmesa->lastTexAge[heap] = ++mmesa->sarea->texAge[heap];
+
+      /* Update the global LRU */
+      for ( i = start ; i <= end ; i++ ) {
+	 /* do we own this block? */
+	 if (list[i].in_use == mmesa->hHWContext) {
+	    list[i].in_use = 0;
+	    list[i].age = mmesa->lastTexAge[heap];
+
+	    /* remove_from_list(i) */
+	    list[(GLuint)list[i].next].prev = list[i].prev;
+	    list[(GLuint)list[i].prev].next = list[i].next;
+	 }
+      }
+   }
+#endif
+
+   if ( t->memBlock ) {
+      mmFreeMem( t->memBlock );
+      t->memBlock = NULL;
+   }
+
+   t->dirty = ~0;
+   move_to_tail( &mmesa->SwappedOut, t );
+}
+
+/* Print out debugging information about texture LRU.
+ */
+void mach64PrintLocalLRU( mach64ContextPtr mmesa, int heap )
+{
+   mach64TexObjPtr t;
+   int sz = 1 << (mmesa->mach64Screen->logTexGranularity[heap]);
+
+   fprintf( stderr, "\nLocal LRU, heap %d:\n", heap );
+
+   foreach( t, &mmesa->TexObjList[heap] ) {
+      if ( !t->tObj ) {
+	 fprintf( stderr, "Placeholder %d at 0x%x sz 0x%x\n",
+		  t->memBlock->ofs / sz,
+		  t->memBlock->ofs,
+		  t->memBlock->size );
+      } else {
+	 fprintf( stderr, "Texture (bound %d) at 0x%x sz 0x%x\n",
+		  t->bound,
+		  t->memBlock->ofs,
+		  t->memBlock->size );
+      }
+   }
+
+   fprintf( stderr, "\n" );
+}
+
+void mach64PrintGlobalLRU( mach64ContextPtr mmesa, int heap )
+{
+   drmTextureRegion *list = mmesa->sarea->texList[heap];
+   int i, j;
+
+   fprintf( stderr, "\nGlobal LRU, heap %d list %p:\n", heap, list );
+
+   for ( i = 0, j = MACH64_NR_TEX_REGIONS ; i < MACH64_NR_TEX_REGIONS ; i++ ) {
+      fprintf( stderr, "list[%d] age %d in_use %d next %d prev %d\n",
+	       j, list[j].age, list[j].in_use, list[j].next, list[j].prev );
+      j = list[j].next;
+      if ( j == MACH64_NR_TEX_REGIONS ) break;
+   }
+
+   if ( j != MACH64_NR_TEX_REGIONS ) {
+      fprintf( stderr, "Loop detected in global LRU\n" );
+      for ( i = 0 ; i < MACH64_NR_TEX_REGIONS ; i++ ) {
+	 fprintf( stderr, "list[%d] age %d in_use %d next %d prev %d\n",
+		  i, list[i].age, list[i].in_use, list[i].next, list[i].prev );
+      }
+   }
+
+   fprintf( stderr, "\n" );
+}
+
+/* Reset the global texture LRU.
+ */
+/* NOTE: This function is only called while holding the hardware lock */
+static void mach64ResetGlobalLRU( mach64ContextPtr mmesa, int heap )
+{
+   drmTextureRegion *list = mmesa->sarea->texList[heap];
+   int sz = 1 << mmesa->mach64Screen->logTexGranularity[heap];
+   int i;
+
+   /* (Re)initialize the global circular LRU list.  The last element in
+    * the array (MACH64_NR_TEX_REGIONS) is the sentinal.  Keeping it at
+    * the end of the array allows it to be addressed rationally when
+    * looking up objects at a particular location in texture memory.
+    */
+   for ( i = 0 ; (i+1) * sz <= mmesa->mach64Screen->texSize[heap] ; i++ ) {
+      list[i].prev = i-1;
+      list[i].next = i+1;
+      list[i].age = 0;
+      list[i].in_use = 0;
+   }
+
+   i--;
+   list[0].prev = MACH64_NR_TEX_REGIONS;
+   list[i].prev = i-1;
+   list[i].next = MACH64_NR_TEX_REGIONS;
+   list[MACH64_NR_TEX_REGIONS].prev = i;
+   list[MACH64_NR_TEX_REGIONS].next = 0;
+   mmesa->sarea->texAge[heap] = 0;
+}
+
+/* Update the local and global texture LRUs.
+ */
+/* NOTE: This function is only called while holding the hardware lock */
+void mach64UpdateTexLRU( mach64ContextPtr mmesa,
+			 mach64TexObjPtr t )
+{
+   int heap = t->heap;
+   drmTextureRegion *list = mmesa->sarea->texList[heap];
+   int log2sz = mmesa->mach64Screen->logTexGranularity[heap];
+   int start = t->memBlock->ofs >> log2sz;
+   int end = (t->memBlock->ofs + t->memBlock->size - 1) >> log2sz;
+   int i;
+
+   mmesa->lastTexAge[heap] = ++mmesa->sarea->texAge[heap];
+
+   if ( !t->memBlock ) {
+      fprintf( stderr, "no memblock\n\n" );
+      return;
+   }
+
+   /* Update our local LRU */
+   move_to_head( &mmesa->TexObjList[heap], t );
+
+   /* Update the global LRU */
+   for ( i = start ; i <= end ; i++ ) {
+      list[i].in_use = mmesa->hHWContext;
+      list[i].age = mmesa->lastTexAge[heap];
+
+#if 0
+      /* if this is the last region, it's not in the list */
+      if ( !(i*(1<<log2sz) > mmesa->mach64Screen->texSize[heap] ) ) {
+#endif
+	 /* remove_from_list(i) */
+	 list[(GLuint)list[i].next].prev = list[i].prev;
+	 list[(GLuint)list[i].prev].next = list[i].next;
+#if 0
+      }
+#endif
+
+      /* insert_at_head(list, i) */
+      list[i].prev = MACH64_NR_TEX_REGIONS;
+      list[i].next = list[MACH64_NR_TEX_REGIONS].next;
+      list[(GLuint)list[MACH64_NR_TEX_REGIONS].next].prev = i;
+      list[MACH64_NR_TEX_REGIONS].next = i;
+   }
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_LRU ) {
+      mach64PrintGlobalLRU( mmesa, t->heap );
+      mach64PrintLocalLRU( mmesa, t->heap );
+   }
+}
+
+/* Update our notion of what textures have been changed since we last
+ * held the lock.  This pertains to both our local textures and the
+ * textures belonging to other clients.  Keep track of other client's
+ * textures by pushing a placeholder texture onto the LRU list -- these
+ * are denoted by (tObj == NULL).
+ */
+/* NOTE: This function is only called while holding the hardware lock */
+static void mach64TexturesGone( mach64ContextPtr mmesa, int heap,
+				int offset, int size, int in_use )
+{
+   mach64TexObjPtr t, tmp;
+
+   foreach_s ( t, tmp, &mmesa->TexObjList[heap] ) {
+      if ( t->memBlock->ofs >= offset + size ||
+	   t->memBlock->ofs + t->memBlock->size <= offset )
+	 continue;
+
+      /* It overlaps - kick it out.  Need to hold onto the currently
+       * bound objects, however.
+       */
+      if ( t->bound ) {
+	 mach64SwapOutTexObj( mmesa, t );
+      } else {
+	 mach64DestroyTexObj( mmesa, t );
+      }
+   }
+
+   if ( in_use > 0 && in_use != mmesa->hHWContext ) {
+      t = (mach64TexObjPtr) CALLOC( sizeof(*t) );
+      if (!t) return;
+
+      t->memBlock = mmAllocMem( mmesa->texHeap[heap], size, 0, offset );
+      if ( !t->memBlock ) {
+	 fprintf( stderr, "Couldn't alloc placeholder sz %x ofs %x\n",
+		  (int)size, (int)offset );
+	 mmDumpMemInfo( mmesa->texHeap[heap] );
+	 return;
+      }
+      insert_at_head( &mmesa->TexObjList[heap], t );
+   }
+}
+
+/* Update our client's shared texture state.  If another client has
+ * modified a region in which we have textures, then we need to figure
+ * out which of our textures has been removed, and update our global
+ * LRU.
+ */
+void mach64AgeTextures( mach64ContextPtr mmesa, int heap )
+{
+   ATISAREAPrivPtr sarea = mmesa->sarea;
+
+   if ( sarea->texAge[heap] != mmesa->lastTexAge[heap] ) {
+      int sz = 1 << mmesa->mach64Screen->logTexGranularity[heap];
+      int nr = 0;
+      int idx;
+
+      /* Have to go right round from the back to ensure stuff ends up
+       * LRU in our local list...  Fix with a cursor pointer.
+       */
+      for ( idx = sarea->texList[heap][MACH64_NR_TEX_REGIONS].prev ;
+	    idx != MACH64_NR_TEX_REGIONS && nr < MACH64_NR_TEX_REGIONS ;
+	    idx = sarea->texList[heap][idx].prev, nr++ )
+      {
+	 /* If switching texturing schemes, then the SAREA might not
+	  * have been properly cleared, so we need to reset the
+	  * global texture LRU.
+	  */
+	 if ( idx * sz > mmesa->mach64Screen->texSize[heap] ) {
+	    nr = MACH64_NR_TEX_REGIONS;
+	    break;
+	 }
+
+	 if ( sarea->texList[heap][idx].age > mmesa->lastTexAge[heap] ) {
+	    mach64TexturesGone( mmesa, heap, idx * sz, sz,
+				sarea->texList[heap][idx].in_use );
+	 }
+      }
+
+      /* If switching texturing schemes, then the SAREA might not
+       * have been properly cleared, so we need to reset the
+       * global texture LRU.
+       */
+      if ( nr == MACH64_NR_TEX_REGIONS ) {
+	 mach64TexturesGone( mmesa, heap, 0,
+			     mmesa->mach64Screen->texSize[heap], 0 );
+	 mach64ResetGlobalLRU( mmesa, heap );
+      }
+
+      if ( 0 ) {
+	 mach64PrintGlobalLRU( mmesa, heap );
+	 mach64PrintLocalLRU( mmesa, heap );
+      }
+
+      mmesa->dirty |= (MACH64_UPLOAD_CONTEXT |
+		       MACH64_UPLOAD_TEX0IMAGE |
+		       MACH64_UPLOAD_TEX1IMAGE);
+      mmesa->lastTexAge[heap] = sarea->texAge[heap];
+   }
+}
+
+/* Upload the texture image associated with texture `t' at level `level'
+ * at the address relative to `start'.
+ */
+static void mach64UploadAGPSubImage( mach64ContextPtr mmesa,
+				     mach64TexObjPtr t, int level,
+				     int x, int y, int width, int height )
+{
+   mach64ScreenRec *mach64Screen = mmesa->mach64Screen;
+   struct gl_texture_image *image;
+   int texelsPerDword = 0;
+   int dwords;
+
+   /* Ensure we have a valid texture to upload */
+   if ( ( level < 0 ) || ( level > mmesa->glCtx->Const.MaxTextureLevels ) )
+     return;
+
+   image = t->tObj->Image[level];
+   if ( !image )
+      return;
+
+   switch ( image->TexFormat->TexelBytes ) {
+   case 1: texelsPerDword = 4; break;
+   case 2: texelsPerDword = 2; break;
+   case 4: texelsPerDword = 1; break;
+   }
+
+#if 1
+   /* FIXME: The subimage index calcs are wrong... */
+   x = 0;
+   y = 0;
+   width = image->Width;
+   height = image->Height;
+#endif
+
+   dwords = width * height / texelsPerDword;
+
+#if ENABLE_PERF_BOXES
+   /* Bump the performance counter */
+   mmesa->c_agpTextureBytes += (dwords << 2);
+#endif
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "mach64UploadSubImage: %d,%d of %d,%d at %d,%d\n",
+	       width, height, image->Width, image->Height, x, y );
+      fprintf( stderr, "            blit ofs: 0x%07x pitch: 0x%x dwords: %d\n",
+	       (GLuint)t->offset, (GLint)width, dwords );
+      mmDumpMemInfo( mmesa->texHeap[t->heap] );
+   }
+
+   assert(image->Data);
+
+   {
+      CARD32 *dst = (CARD32 *)((char *)mach64Screen->agpTextures.map + t->memBlock->ofs);
+      const GLubyte *src = (const GLubyte *) image->Data +
+	 (y * image->Width + x) * image->TexFormat->TexelBytes;
+      const GLuint bytes = width * height * image->TexFormat->TexelBytes;
+      memcpy(dst, src, bytes);
+   }
+
+}
+
+/* Upload the texture image associated with texture `t' at level `level'
+ * at the address relative to `start'.
+ */
+static void mach64UploadLocalSubImage( mach64ContextPtr mmesa,
+				  mach64TexObjPtr t, int level,
+				  int x, int y, int width, int height )
+{
+   struct gl_texture_image *image;
+   int texelsPerDword = 0;
+   int imageWidth, imageHeight;
+   int remaining, rows;
+   int format, dwords;
+   const int maxdwords = (MACH64_BUFFER_MAX_DWORDS - (MACH64_HOSTDATA_BLIT_OFFSET / 4));
+   CARD32 pitch, offset;
+   int i;
+
+   /* Ensure we have a valid texture to upload */
+   if ( ( level < 0 ) || ( level > mmesa->glCtx->Const.MaxTextureLevels ) )
+      return;
+
+   image = t->tObj->Image[level];
+   if ( !image )
+      return;
+
+   switch ( image->TexFormat->TexelBytes ) {
+   case 1: texelsPerDword = 4; break;
+   case 2: texelsPerDword = 2; break;
+   case 4: texelsPerDword = 1; break;
+   }
+
+#if 1
+   /* FIXME: The subimage index calcs are wrong... */
+   x = 0;
+   y = 0;
+   width = image->Width;
+   height = image->Height;
+#endif
+
+   imageWidth  = image->Width;
+   imageHeight = image->Height;
+
+   format = t->textureFormat;
+
+   /* The texel upload routines have a minimum width, so force the size
+    * if needed.
+    */
+   if ( imageWidth < texelsPerDword ) {
+      int factor;
+
+      factor = texelsPerDword / imageWidth;
+      imageWidth = texelsPerDword;
+      imageHeight /= factor;
+      if ( imageHeight == 0 ) {
+	 /* In this case, the texel converter will actually walk a
+	  * texel or two off the end of the image, but normal malloc
+	  * alignment should prevent it from ever causing a fault.
+	  */
+	 imageHeight = 1;
+      }
+   }
+
+   /* We can't upload to a pitch less than 64 texels so we will need to
+    * linearly upload all modified rows for textures smaller than this.
+    * This makes the x/y/width/height different for the blitter and the
+    * texture walker.
+    */
+   if ( imageWidth >= 64 ) {
+      /* The texture walker and the blitter look identical */
+      pitch = imageWidth >> 3;
+   } else {
+      int factor;
+      int y2;
+      int start, end;
+
+      start = (y * imageWidth) & ~63;
+      end = (y + height) * imageWidth;
+
+      if ( end - start < 64 ) {
+	 /* Handle the case where the total number of texels
+	  * uploaded is < 64.
+	  */
+	 x = 0;
+	 y = start / 64;
+	 width = end - start;
+	 height = 1;
+      } else {
+	 /* Upload some number of full 64 texel blit rows */
+	 factor = 64 / imageWidth;
+
+	 y2 = y + height - 1;
+	 y /= factor;
+	 y2 /= factor;
+
+	 x = 0;
+	 width = 64;
+	 height = y2 - y + 1;
+      }
+
+      /* Fixed pitch of 64 */
+      pitch = 8;
+   }
+
+   dwords = width * height / texelsPerDword;
+   offset = t->offset;
+
+#if ENABLE_PERF_BOXES
+   /* Bump the performance counter */
+   mmesa->c_textureBytes += (dwords << 2);
+#endif
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "mach64UploadSubImage: %d,%d of %d,%d at %d,%d\n",
+	       width, height, image->Width, image->Height, x, y );
+      fprintf( stderr, "            blit ofs: 0x%07x pitch: 0x%x dwords: %d\n",
+	       (GLuint)offset, (GLint)width, dwords );
+      mmDumpMemInfo( mmesa->texHeap[t->heap] );
+   }
+
+   /* Subdivide the texture if required (account for the registers added by the drm) */
+   if ( dwords <= maxdwords ) {
+      rows = height;
+   } else {
+      rows = (maxdwords * texelsPerDword) / (2 * width);
+   }
+
+   for ( i = 0, remaining = height ;
+	 remaining > 0 ;
+	 remaining -= rows, y += rows, i++ )
+   {
+       drmBufPtr buffer;
+       CARD32 *dst;
+
+       height = MIN2(remaining, rows);
+
+       /* Grab the dma buffer for the texture blit */
+       buffer = mach64GetBufferLocked( mmesa );
+
+       dst = (CARD32 *)((char *)buffer->address + MACH64_HOSTDATA_BLIT_OFFSET);
+
+       assert(image->Data);
+
+       {
+          const GLubyte *src = (const GLubyte *) image->Data +
+             (y * image->Width + x) * image->TexFormat->TexelBytes;
+          const GLuint bytes = width * height * image->TexFormat->TexelBytes;
+          memcpy(dst, src, bytes);
+       }
+
+       mach64FireBlitLocked( mmesa, buffer, offset, pitch, format,
+			     x, y, width, height );
+
+   }
+
+   mmesa->new_state |= MACH64_NEW_CONTEXT;
+   mmesa->dirty |= MACH64_UPLOAD_CONTEXT | MACH64_UPLOAD_MISC;
+}
+
+
+/* Upload the texture images associated with texture `t'.  This might
+ * require removing our own and/or other client's texture objects to
+ * make room for these images.
+ */
+void mach64UploadTexImages( mach64ContextPtr mmesa, mach64TexObjPtr t )
+{
+   GLint heap;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %p, %p )\n",
+	       __FUNCTION__, mmesa->glCtx, t );
+   }
+
+   assert(t);
+   assert(t->tObj);
+
+   /* Choose the heap appropriately */
+   heap = MACH64_CARD_HEAP;
+
+   if ( !mmesa->mach64Screen->IsPCI &&
+	t->size > mmesa->mach64Screen->texSize[heap] ) {
+      heap = MACH64_AGP_HEAP;
+   }
+
+   /* Do we need to eject LRU texture objects? */
+   if ( !t->memBlock ) {
+      t->heap = heap;
+
+      /* Allocate a memory block on a 64-byte boundary */
+      t->memBlock = mmAllocMem( mmesa->texHeap[heap], t->size, 6, 0 );
+
+      /* Try AGP before kicking anything out of local mem */
+      if ( !mmesa->mach64Screen->IsPCI && !t->memBlock && heap == MACH64_CARD_HEAP ) {
+	 t->memBlock = mmAllocMem( mmesa->texHeap[MACH64_AGP_HEAP],
+				   t->size, 6, 0 );
+
+	 if ( t->memBlock )
+	    heap = t->heap = MACH64_AGP_HEAP;
+      }
+
+      /* Kick out textures until the requested texture fits */
+      while ( !t->memBlock ) {
+	 if ( mmesa->TexObjList[heap].prev->bound ) {
+	    fprintf( stderr,
+		     "mach64UploadTexImages: ran into bound texture\n" );
+	    return;
+	 }
+	 if ( mmesa->TexObjList[heap].prev == &mmesa->TexObjList[heap] ) {
+	    if ( mmesa->mach64Screen->IsPCI ) {
+	       fprintf( stderr, "%s: upload texture failure on "
+			"local texture heaps, sz=%d\n", __FUNCTION__,
+			t->size );
+	       return;
+	    } else if ( heap == MACH64_CARD_HEAP ) {
+	       heap = t->heap = MACH64_AGP_HEAP;
+	       continue;
+	    } else {
+	      int i;
+	       fprintf( stderr, "%s: upload texture failure on "
+			"%sAGP texture heaps, sz=%d\n", __FUNCTION__,
+			mmesa->firstTexHeap == MACH64_CARD_HEAP ? "both local and " : "",
+			t->size );
+	       for ( i = mmesa->firstTexHeap ; i < mmesa->lastTexHeap ; i++ ) {
+		  mach64PrintLocalLRU( mmesa, i );
+	          mmDumpMemInfo( mmesa->texHeap[i] );
+	       }
+	       exit(-1);
+	       return;
+	    }
+	 }
+
+	 mach64SwapOutTexObj( mmesa, mmesa->TexObjList[heap].prev );
+
+	 t->memBlock = mmAllocMem( mmesa->texHeap[heap], t->size, 6, 0 );
+      }
+
+      /* Set the base offset of the texture image */
+      t->offset = mmesa->mach64Screen->texOffset[heap] + t->memBlock->ofs;
+
+      /* Force loading the new state into the hardware */
+      mmesa->dirty |= (MACH64_UPLOAD_SCALE_3D_CNTL |
+		       MACH64_UPLOAD_TEXTURE);
+   }
+
+   /* Let the world know we've used this memory recently */
+   mach64UpdateTexLRU( mmesa, t );
+
+   /* Upload any images that are new */
+   if ( t->dirty ) {
+      if (t->heap == MACH64_AGP_HEAP) {
+	 /* Need to make sure any vertex buffers in the queue complete */
+	 mach64WaitForIdleLocked( mmesa );
+	 mach64UploadAGPSubImage( mmesa, t, t->tObj->BaseLevel, 0, 0,
+				  t->tObj->Image[0][t->tObj->BaseLevel]->Width,
+				  t->tObj->Image[0][t->tObj->BaseLevel]->Height );
+      } else {
+	 mach64UploadLocalSubImage( mmesa, t, t->tObj->BaseLevel, 0, 0,
+				    t->tObj->Image[0][t->tObj->BaseLevel]->Width,
+				    t->tObj->Image[0][t->tObj->BaseLevel]->Height );
+      }
+
+      mmesa->setup.tex_cntl |= MACH64_TEX_CACHE_FLUSH;
+   }
+
+   mmesa->dirty |= MACH64_UPLOAD_TEXTURE;
+
+   t->dirty = 0;
+}
+
+/* The mach64 needs to have both primary and secondary textures in either
+ * local or AGP memory, so we need a "buddy system" to make sure that allocation
+ * succeeds or fails for both textures.
+ * FIXME: This needs to be optimized better.
+ */
+void mach64UploadMultiTexImages( mach64ContextPtr mmesa, 
+				 mach64TexObjPtr t0,
+				 mach64TexObjPtr t1 )
+{
+   GLint heap;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %p, %p %p )\n",
+	       __FUNCTION__, mmesa->glCtx, t0, t1 );
+   }
+
+   assert(t0 && t1);
+   assert(t0->tObj && t1->tObj);
+
+   /* Choose the heap appropriately */
+   heap = MACH64_CARD_HEAP;
+
+   if ( !mmesa->mach64Screen->IsPCI &&
+	((t0->size + t1->size) > mmesa->mach64Screen->texSize[heap]) ) {
+      heap = MACH64_AGP_HEAP;
+   }
+
+   /* Do we need to eject LRU texture objects? */
+   if ( !t0->memBlock || !t1->memBlock || t0->heap != t1->heap ) {
+      /* FIXME: starting from scratch for now to keep it simple */
+      if ( t0->memBlock ) {
+	 mach64SwapOutTexObj( mmesa, t0 );
+      }
+      if ( t1->memBlock ) {
+	 mach64SwapOutTexObj( mmesa, t1 );
+      }
+      t0->heap = t1->heap = heap;
+      /* Allocate a memory block on a 64-byte boundary */
+      t0->memBlock = mmAllocMem( mmesa->texHeap[heap], t0->size, 6, 0 );
+      if ( t0->memBlock ) {
+	 t1->memBlock = mmAllocMem( mmesa->texHeap[heap], t1->size, 6, 0 );
+	 if ( !t1->memBlock ) {
+	    mmFreeMem( t0->memBlock );
+	    t0->memBlock = NULL;
+	 }
+      }
+      /* Try AGP before kicking anything out of local mem */
+      if ( (!t0->memBlock || !t1->memBlock) && heap == MACH64_CARD_HEAP ) {
+	 t0->memBlock = mmAllocMem( mmesa->texHeap[MACH64_AGP_HEAP], t0->size, 6, 0 );
+	 if ( t0->memBlock ) {
+	    t1->memBlock = mmAllocMem( mmesa->texHeap[MACH64_AGP_HEAP], t1->size, 6, 0 );
+	    if ( !t1->memBlock ) {
+	       mmFreeMem( t0->memBlock );
+	       t0->memBlock = NULL;
+	    }
+	 }
+
+	 if ( t0->memBlock && t1->memBlock )
+	    heap = t0->heap = t1->heap = MACH64_AGP_HEAP;
+      }
+
+      /* Kick out textures until the requested texture fits */
+      while ( !t0->memBlock || !t1->memBlock ) {
+	 if ( mmesa->TexObjList[heap].prev->bound ) {
+	    fprintf( stderr,
+		     "%s: ran into bound texture\n", __FUNCTION__ );
+	    return;
+	 }
+	 if ( mmesa->TexObjList[heap].prev == &mmesa->TexObjList[heap] ) {
+	    if ( mmesa->mach64Screen->IsPCI ) {
+	       fprintf( stderr, "%s: upload texture failure on local "
+			"texture heaps, tex0 sz=%d  tex1 sz=%d\n", __FUNCTION__, 
+			t0->size, t1->size );
+	       return;
+	    } else if ( heap == MACH64_CARD_HEAP ) {
+	       /* If only one allocation succeeded, start over again in AGP */
+	       if (t0->memBlock) {
+		  mmFreeMem( t0->memBlock );
+	          t0->memBlock = NULL;
+	       }
+	       if (t1->memBlock) {
+		  mmFreeMem( t1->memBlock );
+	          t1->memBlock = NULL;
+	       }
+	       heap = t0->heap = t1->heap = MACH64_AGP_HEAP;
+	       continue;
+	    } else {
+	      int i;
+	       fprintf( stderr, "%s: upload texture failure on %s"
+			"AGP texture heaps, tex0 sz=%d  tex1 sz=%d\n", __FUNCTION__,
+			mmesa->firstTexHeap == MACH64_CARD_HEAP ? "both local and " : "",
+			t0->size, t1->size );
+	       for ( i = mmesa->firstTexHeap ; i < mmesa->lastTexHeap ; i++ ) {
+		  mach64PrintLocalLRU( mmesa, i );
+	          mmDumpMemInfo( mmesa->texHeap[i] );
+	       }
+	       exit(-1);
+	       return;
+	    }
+	 }
+
+	 mach64SwapOutTexObj( mmesa, mmesa->TexObjList[heap].prev );
+	 
+	 if (!t0->memBlock)
+	    t0->memBlock = mmAllocMem( mmesa->texHeap[heap], t0->size, 6, 0 );
+	 if (!t1->memBlock)
+	    t1->memBlock = mmAllocMem( mmesa->texHeap[heap], t1->size, 6, 0 );
+      }
+
+      /* Set the base offset of the texture image */
+      t0->offset = mmesa->mach64Screen->texOffset[heap] + t0->memBlock->ofs;
+      t1->offset = mmesa->mach64Screen->texOffset[heap] + t1->memBlock->ofs;
+
+      /* Force loading the new state into the hardware */
+      mmesa->dirty |= (MACH64_UPLOAD_SCALE_3D_CNTL |
+		       MACH64_UPLOAD_TEXTURE);
+   }
+
+   /* Let the world know we've used this memory recently */
+   mach64UpdateTexLRU( mmesa, t0 );
+   mach64UpdateTexLRU( mmesa, t1 );
+
+   /* Upload any images that are new */
+   if ( t0->dirty ) {
+      if (t0->heap == MACH64_AGP_HEAP) {
+	 /* Need to make sure any vertex buffers in the queue complete */
+	 mach64WaitForIdleLocked( mmesa );
+	 mach64UploadAGPSubImage( mmesa, t0, t0->tObj->BaseLevel, 0, 0,
+				    t0->tObj->Image[0][t0->tObj->BaseLevel]->Width,
+				    t0->tObj->Image[0][t0->tObj->BaseLevel]->Height );
+      } else {
+	 mach64UploadLocalSubImage( mmesa, t0, t0->tObj->BaseLevel, 0, 0,
+				    t0->tObj->Image[0][t0->tObj->BaseLevel]->Width,
+				    t0->tObj->Image[0][t0->tObj->BaseLevel]->Height );
+      }
+      mmesa->setup.tex_cntl |= MACH64_TEX_CACHE_FLUSH;
+   }
+   if ( t1->dirty ) {
+      if (t1->heap == MACH64_AGP_HEAP) {
+	 /* Need to make sure any vertex buffers in the queue complete */
+	 mach64WaitForIdleLocked( mmesa );
+	 mach64UploadAGPSubImage( mmesa, t1, t1->tObj->BaseLevel, 0, 0,
+			       t1->tObj->Image[0][t1->tObj->BaseLevel]->Width,
+			       t1->tObj->Image[0][t1->tObj->BaseLevel]->Height );
+      } else {
+	 mach64UploadLocalSubImage( mmesa, t1, t1->tObj->BaseLevel, 0, 0,
+			       t1->tObj->Image[0][t1->tObj->BaseLevel]->Width,
+			       t1->tObj->Image[0][t1->tObj->BaseLevel]->Height );
+      }
+      
+      mmesa->setup.tex_cntl |= MACH64_TEX_CACHE_FLUSH;
+   }
+
+   mmesa->dirty |= MACH64_UPLOAD_TEXTURE;
+
+   t0->dirty = 0;
+   t1->dirty = 0;
+}
diff --git a/src/mesa/drivers/dri/mach64/mach64_texstate.c b/src/mesa/drivers/dri/mach64/mach64_texstate.c
new file mode 100644
index 0000000000..ada4b056f5
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_texstate.c
@@ -0,0 +1,558 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "context.h"
+#include "macros.h"
+#include "texformat.h"
+
+#include "mach64_context.h"
+#include "mach64_ioctl.h"
+#include "mach64_state.h"
+#include "mach64_vb.h"
+#include "mach64_tris.h"
+#include "mach64_tex.h"
+
+static void mach64SetTexImages( mach64ContextPtr mmesa,
+                              const struct gl_texture_object *tObj )
+{
+   mach64TexObjPtr t = (mach64TexObjPtr) tObj->DriverData;
+   struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
+#if 0
+   int log2Pitch, log2Height, log2Size, log2MinSize;
+   int i;
+   GLint firstLevel, lastLevel;
+#endif
+   int totalSize;
+
+   assert(t);
+   assert(baseImage);
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API )
+      fprintf( stderr, "%s( %p )\n", __FUNCTION__, tObj );
+
+   switch (baseImage->TexFormat->MesaFormat) {
+   case MESA_FORMAT_ARGB8888:
+      t->textureFormat = MACH64_DATATYPE_ARGB8888;
+      break;
+   case MESA_FORMAT_ARGB4444:
+      t->textureFormat = MACH64_DATATYPE_ARGB4444;
+      break;
+   case MESA_FORMAT_RGB565:
+      t->textureFormat = MACH64_DATATYPE_RGB565;
+      break;
+   case MESA_FORMAT_ARGB1555:
+      t->textureFormat = MACH64_DATATYPE_ARGB1555;
+      break;
+   case MESA_FORMAT_RGB332:
+      t->textureFormat = MACH64_DATATYPE_RGB332;
+      break;
+   case MESA_FORMAT_RGB888:
+      t->textureFormat = MACH64_DATATYPE_RGB8;
+      break;
+   case MESA_FORMAT_CI8:
+      t->textureFormat = MACH64_DATATYPE_CI8;
+      break;
+   case MESA_FORMAT_YCBCR:
+      t->textureFormat = MACH64_DATATYPE_YVYU422;
+      break;
+   case MESA_FORMAT_YCBCR_REV:
+      t->textureFormat = MACH64_DATATYPE_VYUY422;
+      break;
+   default:
+      _mesa_problem(mmesa->glCtx, "Bad texture format in %s", __FUNCTION__);
+   };
+
+#if 0
+   /* Compute which mipmap levels we really want to send to the hardware.
+    * This depends on the base image size, GL_TEXTURE_MIN_LOD,
+    * GL_TEXTURE_MAX_LOD, GL_TEXTURE_BASE_LEVEL, and GL_TEXTURE_MAX_LEVEL.
+    * Yes, this looks overly complicated, but it's all needed.
+    */
+   firstLevel = tObj->BaseLevel + (GLint) (tObj->MinLod + 0.5);
+   firstLevel = MAX2(firstLevel, tObj->BaseLevel);
+   lastLevel = tObj->BaseLevel + (GLint) (tObj->MaxLod + 0.5);
+   lastLevel = MAX2(lastLevel, tObj->BaseLevel);
+   lastLevel = MIN2(lastLevel, tObj->BaseLevel + baseImage->MaxLog2);
+   lastLevel = MIN2(lastLevel, tObj->MaxLevel);
+   lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */
+
+   log2Pitch = tObj->Image[firstLevel]->WidthLog2;
+   log2Height = tObj->Image[firstLevel]->HeightLog2;
+   log2Size = MAX2(log2Pitch, log2Height);
+   log2MinSize = log2Size;
+
+   t->dirty = 0;
+   totalSize = 0;
+   for ( i = firstLevel; i <= lastLevel; i++ ) {
+      const struct gl_texture_image *texImage;
+
+      texImage = tObj->Image[i];
+      if ( !texImage || !texImage->Data ) {
+         lastLevel = i - 1;
+	 break;
+      }
+
+      log2MinSize = texImage->MaxLog2;
+
+      t->image[i - firstLevel].offset = totalSize;
+      t->image[i - firstLevel].width  = tObj->Image[i]->Width;
+      t->image[i - firstLevel].height = tObj->Image[i]->Height;
+
+      t->dirty |= (1 << i);
+
+      totalSize += (tObj->Image[i]->Height *
+		    tObj->Image[i]->Width *
+		    tObj->Image[i]->TexFormat->TexelBytes);
+
+      /* Offsets must be 32-byte aligned for host data blits and tiling */
+      totalSize = (totalSize + 31) & ~31;
+   }
+
+   t->totalSize = totalSize;
+   t->firstLevel = firstLevel;
+   t->lastLevel = lastLevel;
+
+   /* Set the texture format */
+   t->setup.tex_cntl &= ~(0xf << 16);
+   t->setup.tex_cntl |= t->textureFormat;
+
+   t->setup.tex_combine_cntl = 0x00000000;  /* XXX is this right? */
+
+   t->setup.tex_size_pitch = ((log2Pitch   << R128_TEX_PITCH_SHIFT) |
+			      (log2Size    << R128_TEX_SIZE_SHIFT) |
+			      (log2Height  << R128_TEX_HEIGHT_SHIFT) |
+			      (log2MinSize << R128_TEX_MIN_SIZE_SHIFT));
+
+   for ( i = 0 ; i < R128_MAX_TEXTURE_LEVELS ; i++ ) {
+      t->setup.tex_offset[i]  = 0x00000000;
+   }
+
+   if (firstLevel == lastLevel)
+      t->setup.tex_cntl |= R128_MIP_MAP_DISABLE;
+   else
+      t->setup.tex_cntl &= ~R128_MIP_MAP_DISABLE;
+
+#else
+   if ( ( baseImage->Format == GL_RGBA ) ||
+	( baseImage->Format == GL_ALPHA ) ||
+	( baseImage->Format == GL_LUMINANCE_ALPHA ) ) {
+      t->hasAlpha = 1;
+   } else {
+      t->hasAlpha = 0;
+   }
+
+   totalSize = ( baseImage->Width * baseImage->Height * 
+      baseImage->TexFormat->TexelBytes );
+   totalSize = (totalSize + 31) & ~31;
+   t->size = totalSize;
+   t->widthLog2 = baseImage->WidthLog2;
+   t->heightLog2 = baseImage->HeightLog2;
+   t->maxLog2 = baseImage->MaxLog2;
+   
+#endif
+}
+
+static void mach64UpdateTextureEnv( GLcontext *ctx, int unit )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLint source = mmesa->tmu_source[unit];
+   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[source];
+   const struct gl_texture_object *tObj = texUnit->_Current;
+   const GLenum format = tObj->Image[0][tObj->BaseLevel]->Format;
+   GLuint s = mmesa->setup.scale_3d_cntl;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %p, %d )\n",
+	       __FUNCTION__, ctx, unit );
+   }
+
+/*                 REPLACE  MODULATE   DECAL              GL_BLEND
+ *
+ * ALPHA           C = Cf   C = Cf     undef              C = Cf
+ *                 A = At   A = AfAt                      A = AfAt
+ *
+ * LUMINANCE       C = Ct   C = CfCt   undef              C = Cf(1-Ct)+CcCt 
+ *                 A = Af   A = Af                        A = Af
+ *
+ * LUMINANCE_ALPHA C = Ct   C = CfCt   undef              C = Cf(1-Ct)+CcCt
+ *                 A = At   A = AfAt                      A = AfAt
+ *
+ * INTENSITY       C = Ct   C = CfCt   undef              C = Cf(1-Ct)+CcCt
+ *                 A = At   A = AfAt                      A = Af(1-At)+AcAt
+ *
+ * RGB             C = Ct   C = CfCt   C = Ct             C = Cf(1-Ct)+CcCt
+ *                 A = Af   A = Af     A = Af             A = Af
+ *
+ * RGBA            C = Ct   C = CfCt   C = Cf(1-At)+CtAt  C = Cf(1-Ct)+CcCt
+ *                 A = At   A = AfAt   A = Af             A = AfAt 
+ */
+
+
+   if ( unit == 0 ) {
+      s &= ~MACH64_TEX_LIGHT_FCN_MASK;
+
+      /* Set the texture environment state 
+       * Need to verify these are working correctly, but the
+       * texenv Mesa demo seems to work.
+       */
+      switch ( texUnit->EnvMode ) {
+      case GL_REPLACE:
+	 switch ( format ) {
+	 case GL_ALPHA:
+	 case GL_LUMINANCE_ALPHA:
+	 case GL_INTENSITY:
+	    /* Not compliant - can't get At */
+	    FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+	    s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	    break;
+	 default:
+	    s |= MACH64_TEX_LIGHT_FCN_REPLACE;
+	 }
+	 break;
+      case GL_MODULATE:
+	 switch ( format ) {
+	 case GL_ALPHA:
+	    FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+	    s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	    break;
+	 case GL_RGB:
+	 case GL_LUMINANCE:
+	    /* These should be compliant */
+	    s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	    break;
+	 case GL_LUMINANCE_ALPHA:
+	 case GL_INTENSITY:
+	    FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+	    s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	    break;
+	 case GL_RGBA:
+	    /* Should fallback when blending enabled for complete compliance */
+	    s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	    break;
+	 default:
+	    s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	 }
+	 break;
+      case GL_DECAL:
+	 switch ( format ) {
+	 case GL_RGBA: 
+	    s |= MACH64_TEX_LIGHT_FCN_ALPHA_DECAL;
+	    break;
+	 case GL_RGB:
+	    s |= MACH64_TEX_LIGHT_FCN_REPLACE;
+	    break;
+	 case GL_ALPHA:
+	 case GL_LUMINANCE_ALPHA:
+	    /* undefined - disable texturing, pass fragment unmodified  */
+	    /* Also, pass fragment alpha instead of texture alpha */
+	    s &= ~MACH64_TEX_MAP_AEN;
+	    s |= MACH64_TEXTURE_DISABLE;
+	    s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	    break;
+	 case GL_LUMINANCE:
+	 case GL_INTENSITY:
+	    /* undefined - disable texturing, pass fragment unmodified  */
+	    s |= MACH64_TEXTURE_DISABLE;
+	    s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	    break;
+	 default:
+	    s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	 }
+	 break;
+      case GL_BLEND:
+	 /* GL_BLEND not supported by RagePRO, use software */
+	 FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+	 s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	 break;
+      case GL_ADD:
+      case GL_COMBINE:
+	 FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+	 s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	 break;
+      default:
+	 s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+      }
+
+      if ( mmesa->setup.scale_3d_cntl != s ) {
+	 mmesa->setup.scale_3d_cntl = s;
+	 mmesa->dirty |= MACH64_UPLOAD_SCALE_3D_CNTL;
+      }
+
+   } else {
+      /* blend = 0, modulate = 1 - initialize to blend */
+      mmesa->setup.tex_cntl &= ~MACH64_COMP_COMBINE_MODULATE;
+      /* Set the texture composite function for multitexturing*/
+      switch ( texUnit->EnvMode ) {
+      case GL_BLEND:
+	 /* GL_BLEND not supported by RagePRO, use software */
+	 FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+	 mmesa->setup.tex_cntl |= MACH64_COMP_COMBINE_MODULATE;
+	 break;
+      case GL_MODULATE:
+	 /* Should fallback when blending enabled for complete compliance */
+	 mmesa->setup.tex_cntl |= MACH64_COMP_COMBINE_MODULATE;
+	 break;
+      case GL_REPLACE:
+	 switch ( format ) {
+	 case GL_ALPHA:
+	    mmesa->setup.tex_cntl |= MACH64_COMP_COMBINE_MODULATE;
+	    break;
+	 default: /* not supported by RagePRO */
+	    FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+	    mmesa->setup.tex_cntl |= MACH64_COMP_COMBINE_MODULATE;
+	 }
+	 break;
+      case GL_DECAL:
+	 switch ( format ) {
+	 case GL_ALPHA:
+	 case GL_LUMINANCE:
+	 case GL_LUMINANCE_ALPHA:
+	 case GL_INTENSITY:
+	    /* undefined, disable compositing and pass fragment unmodified */
+	    mmesa->setup.tex_cntl &= ~MACH64_TEXTURE_COMPOSITE;
+	    break;
+	 default: /* not supported by RagePRO */
+	    FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+	    mmesa->setup.tex_cntl |= MACH64_COMP_COMBINE_MODULATE;
+	 }
+	 break;
+      case GL_ADD:
+      case GL_COMBINE:
+	 FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+	 mmesa->setup.tex_cntl |= MACH64_COMP_COMBINE_MODULATE;
+	 break;
+      default:
+	 mmesa->setup.tex_cntl |= MACH64_COMP_COMBINE_MODULATE;
+      }
+   }
+}
+
+
+static void mach64UpdateTextureUnit( GLcontext *ctx, int unit )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   int source = mmesa->tmu_source[unit];
+   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[source];
+   const struct gl_texture_object *tObj = ctx->Texture.Unit[source]._Current;
+   mach64TexObjPtr t = tObj->DriverData;
+   GLuint d = mmesa->setup.dp_pix_width;
+   GLuint s = mmesa->setup.scale_3d_cntl;
+
+   assert(unit == 0 || unit == 1);  /* only two tex units */
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %p, %d ) enabled=0x%x 0x%x\n",
+	       __FUNCTION__, ctx, unit, ctx->Texture.Unit[0]._ReallyEnabled,
+	       ctx->Texture.Unit[1]._ReallyEnabled);
+   }
+
+   if (texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT)) {
+
+      assert(t);  /* should have driver tex data by now */
+
+      /* Fallback if there's a texture border */
+      if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 ) {
+         FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+         return;
+      }
+
+      /* Upload teximages */
+      if (t->dirty) {
+         mach64SetTexImages( mmesa, tObj );
+	 mmesa->dirty |= (MACH64_UPLOAD_TEX0IMAGE << unit);
+      }
+
+      /* Bind to the given texture unit */
+      mmesa->CurrentTexObj[unit] = t;
+      t->bound |= (1 << unit);
+
+      if ( t->memBlock )
+         mach64UpdateTexLRU( mmesa, t );
+
+      /* register setup */
+      if ( unit == 0 ) {
+         d &= ~MACH64_SCALE_PIX_WIDTH_MASK;
+         d |= (t->textureFormat << 28);
+   
+         s &= ~(MACH64_TEXTURE_DISABLE |
+		MACH64_TEX_CACHE_SPLIT |
+		MACH64_TEX_BLEND_FCN_MASK |
+		MACH64_TEX_MAP_AEN);
+   
+         if ( mmesa->multitex ) {
+	    s |= MACH64_TEX_BLEND_FCN_TRILINEAR | MACH64_TEX_CACHE_SPLIT;
+         } else if ( t->BilinearMin ) {
+	    s |= MACH64_TEX_BLEND_FCN_LINEAR;
+         } else {
+	    s |= MACH64_TEX_BLEND_FCN_NEAREST;
+         }
+         if ( t->BilinearMag ) {
+	    s |=  MACH64_BILINEAR_TEX_EN;
+         } else {
+	    s &= ~MACH64_BILINEAR_TEX_EN;
+         }
+   
+         if ( t->hasAlpha ) {
+	    s |= MACH64_TEX_MAP_AEN;
+         }
+   
+         mmesa->setup.tex_cntl &= ~(MACH64_TEXTURE_CLAMP_S |
+				    MACH64_TEXTURE_CLAMP_T |
+				    MACH64_SECONDARY_STW);
+   
+         if ( t->ClampS ) {
+	    mmesa->setup.tex_cntl |= MACH64_TEXTURE_CLAMP_S;
+         }
+         if ( t->ClampT ) {
+	    mmesa->setup.tex_cntl |= MACH64_TEXTURE_CLAMP_T;
+         }
+   
+         mmesa->setup.tex_size_pitch |= ((t->widthLog2  << 0) |
+					 (t->maxLog2    << 4) |
+					 (t->heightLog2 << 8));
+      } else {
+         
+         /* Enable texture mapping mode */
+         s &= ~MACH64_TEXTURE_DISABLE;
+   
+         d &= ~MACH64_COMPOSITE_PIX_WIDTH_MASK;
+         d |= (t->textureFormat << 4);
+   
+         mmesa->setup.tex_cntl &= ~(MACH64_COMP_ALPHA |
+				    MACH64_SEC_TEX_CLAMP_S |
+				    MACH64_SEC_TEX_CLAMP_T);
+         mmesa->setup.tex_cntl |= (MACH64_TEXTURE_COMPOSITE |
+				   MACH64_SECONDARY_STW);
+   
+         if ( t->BilinearMin ) {
+	    mmesa->setup.tex_cntl |= MACH64_COMP_BLEND_BILINEAR;
+         } else {
+	    mmesa->setup.tex_cntl &= ~MACH64_COMP_BLEND_BILINEAR;
+         }
+         if ( t->BilinearMag ) {
+	    mmesa->setup.tex_cntl |=  MACH64_COMP_FILTER_BILINEAR;
+         } else {
+	    mmesa->setup.tex_cntl &= ~MACH64_COMP_FILTER_BILINEAR;
+         }
+         
+         if ( t->hasAlpha ) {
+	    mmesa->setup.tex_cntl |= MACH64_COMP_ALPHA;
+         }
+         if ( t->ClampS ) {
+	    mmesa->setup.tex_cntl |= MACH64_SEC_TEX_CLAMP_S;
+         }
+         if ( t->ClampT ) {
+	    mmesa->setup.tex_cntl |= MACH64_SEC_TEX_CLAMP_T;
+         }
+   
+         mmesa->setup.tex_size_pitch |= ((t->widthLog2  << 16) |
+					 (t->maxLog2    << 20) |
+					 (t->heightLog2 << 24));
+      }
+   
+      if ( mmesa->setup.scale_3d_cntl != s ) {
+         mmesa->setup.scale_3d_cntl = s;
+         mmesa->dirty |= MACH64_UPLOAD_SCALE_3D_CNTL;
+      }
+   
+      if ( mmesa->setup.dp_pix_width != d ) {
+         mmesa->setup.dp_pix_width = d;
+         mmesa->dirty |= MACH64_UPLOAD_DP_PIX_WIDTH;
+      }  
+   }
+   else if (texUnit->_ReallyEnabled) {
+      /* 3D or cube map texture enabled - fallback */
+      FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+   }
+   else {
+      /* texture unit disabled */
+   }
+}
+
+
+/* Update the hardware texture state */
+void mach64UpdateTextureState( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %p ) en=0x%x 0x%x\n",
+	       __FUNCTION__, ctx, ctx->Texture.Unit[0]._ReallyEnabled,
+	       ctx->Texture.Unit[1]._ReallyEnabled);
+   }
+
+   /* Clear any texturing fallbacks */
+   FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_FALSE );
+
+   /* Unbind any currently bound textures */
+   if ( mmesa->CurrentTexObj[0] ) mmesa->CurrentTexObj[0]->bound = 0;
+   if ( mmesa->CurrentTexObj[1] ) mmesa->CurrentTexObj[1]->bound = 0;
+   mmesa->CurrentTexObj[0] = NULL;
+   mmesa->CurrentTexObj[1] = NULL;
+
+   /* Disable all texturing until it is known to be good */
+   mmesa->setup.scale_3d_cntl  |=  MACH64_TEXTURE_DISABLE;
+   mmesa->setup.scale_3d_cntl  &= ~MACH64_TEX_MAP_AEN;
+   mmesa->setup.tex_cntl       &= ~MACH64_TEXTURE_COMPOSITE;
+
+   mmesa->setup.tex_size_pitch = 0x00000000;
+
+   mmesa->tmu_source[0] = 0;
+   mmesa->tmu_source[1] = 1;
+   mmesa->multitex = 0;
+
+   if (ctx->Texture._EnabledUnits & 0x2) {
+       /* unit 1 enabled */
+       if (ctx->Texture._EnabledUnits & 0x1) {
+	  /* units 0 and 1 enabled */
+	  mmesa->multitex = 1;
+	  mach64UpdateTextureUnit( ctx, 0 );
+	  mach64UpdateTextureEnv( ctx, 0 );
+	  mach64UpdateTextureUnit( ctx, 1 );
+	  mach64UpdateTextureEnv( ctx, 1 );
+       } else {
+	  mmesa->tmu_source[0] = 1;
+	  mmesa->tmu_source[1] = 0;
+	  mach64UpdateTextureUnit( ctx, 0 );
+	  mach64UpdateTextureEnv( ctx, 0 );
+       }
+   } else if (ctx->Texture._EnabledUnits & 0x1) {
+      /* only unit 0 enabled */ 
+      mach64UpdateTextureUnit( ctx, 0 );
+      mach64UpdateTextureEnv( ctx, 0 );
+   }
+
+   mmesa->dirty |= (MACH64_UPLOAD_SCALE_3D_CNTL |
+		    MACH64_UPLOAD_TEXTURE);
+}
+
diff --git a/src/mesa/drivers/dri/mach64/mach64_tris.c b/src/mesa/drivers/dri/mach64/mach64_tris.c
new file mode 100644
index 0000000000..9692b3cae0
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_tris.c
@@ -0,0 +1,1896 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "glheader.h"
+#include "mtypes.h"
+#include "colormac.h"
+#include "macros.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+#include "mach64_tris.h"
+#include "mach64_state.h"
+#include "mach64_context.h"
+#include "mach64_vb.h"
+#include "mach64_ioctl.h"
+
+static const GLuint hw_prim[GL_POLYGON+1] = {
+   MACH64_PRIM_POINTS,
+   MACH64_PRIM_LINES,
+   MACH64_PRIM_LINE_LOOP,
+   MACH64_PRIM_LINE_STRIP,
+   MACH64_PRIM_TRIANGLES,
+   MACH64_PRIM_TRIANGLE_STRIP,
+   MACH64_PRIM_TRIANGLE_FAN,
+   MACH64_PRIM_QUADS,
+   MACH64_PRIM_QUAD_STRIP,
+   MACH64_PRIM_POLYGON,
+};
+
+static void mach64RasterPrimitive( GLcontext *ctx, GLuint hwprim );
+static void mach64RenderPrimitive( GLcontext *ctx, GLenum prim );
+
+
+/* FIXME: Remove this when native template is finished. */
+#define MACH64_PRINT_BUFFER 0
+
+/***********************************************************************
+ *                    Emit primitives as inline vertices               *
+ ***********************************************************************/
+
+#if defined(USE_X86_ASM)
+#define DO_COPY_VERTEX( vb, vertsize, v, n, m )					\
+do {										\
+   register const CARD32 *__p __asm__( "esi" ) = (CARD32 *)v + 10 - vertsize;	\
+   register int __s __asm__( "ecx" ) = vertsize;				\
+   if ( vertsize > 7 ) {							\
+      *vb++ = (2 << 16) | ADRINDEX( MACH64_VERTEX_##n##_SECONDARY_S );		\
+      __asm__ __volatile__( "movsl ; movsl ; movsl"				\
+			    : "=D" (vb), "=S" (__p)				\
+			    : "0" (vb), "1" (__p) );				\
+      __s -= 3;									\
+   }										\
+   *vb++ = ((__s - 1 + m) << 16) |						\
+   	   (ADRINDEX( MACH64_VERTEX_##n##_X_Y ) - (__s - 1) );			\
+   __asm__ __volatile__( "rep ; movsl"						\
+			 : "=%c" (__s), "=D" (vb), "=S" (__p)			\
+			 : "0" (__s), "1" (vb), "2" (__p) );			\
+} while (0)
+#else
+#define DO_COPY_VERTEX( vb, vertsize, v, n, m )				\
+do {									\
+   CARD32 *__p = (CARD32 *)v + 10 - vertsize;				\
+   int __s = vertsize;							\
+   if ( vertsize > 7 ) {						\
+      LE32_OUT( vb++, (2 << 16) |					\
+	    	      ADRINDEX( MACH64_VERTEX_##n##_SECONDARY_S ) );	\
+      *vb++ = *__p++;							\
+      *vb++ = *__p++;							\
+      *vb++ = *__p++;							\
+      __s -= 3;								\
+   }									\
+   LE32_OUT( vb++, ((__s - 1 + m) << 16) |				\
+	           (ADRINDEX( MACH64_VERTEX_##n##_X_Y ) - (__s - 1)) );	\
+   while ( __s-- ) {							\
+      *vb++ = *__p++;							\
+   }									\
+} while (0)
+#endif
+
+#define COPY_VERTEX( vb, vertsize, v, n )	DO_COPY_VERTEX( vb, vertsize, v, n, 0 )
+#define COPY_VERTEX_OOA( vb, vertsize, v, n )	DO_COPY_VERTEX( vb, vertsize, v, n, 1 )
+
+
+static __inline void mach64_draw_quad( mach64ContextPtr mmesa,
+				       mach64VertexPtr v0,
+				       mach64VertexPtr v1,
+				       mach64VertexPtr v2,
+				       mach64VertexPtr v3 )
+{
+#if MACH64_NATIVE_VTXFMT
+   GLcontext *ctx = mmesa->glCtx;
+   const GLuint vertsize = mmesa->vertex_size;
+   GLint a;
+   GLfloat ooa;
+   GLuint xy;
+   const GLuint xyoffset = 9;
+   GLint xx[3], yy[3]; /* 2 fractional bits for hardware */
+   unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 4 + 2;
+   CARD32 *vb, *vbchk;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
+      fprintf(stderr, "%s:\n", __FUNCTION__);
+      fprintf(stderr,"Vertex 1:\n");
+      mach64_print_vertex( ctx, v0 );
+      fprintf(stderr,"Vertex 2:\n");
+      mach64_print_vertex( ctx, v1 );
+      fprintf(stderr,"Vertex 3:\n");
+      mach64_print_vertex( ctx, v2 );
+      fprintf(stderr,"Vertex 4:\n");
+      mach64_print_vertex( ctx, v3 );
+   }
+   
+   xy = LE32_IN( &v0->ui[xyoffset] );
+   xx[0] = (GLshort)( xy >> 16 );
+   yy[0] = (GLshort)( xy & 0xffff );
+   
+   xy = LE32_IN( &v1->ui[xyoffset] );
+   xx[1] = (GLshort)( xy >> 16 );
+   yy[1] = (GLshort)( xy & 0xffff );
+   
+   xy = LE32_IN( &v3->ui[xyoffset] );
+   xx[2] = (GLshort)( xy >> 16 );
+   yy[2] = (GLshort)( xy & 0xffff );
+	   
+   a = (xx[0] - xx[2]) * (yy[1] - yy[2]) -
+       (yy[0] - yy[2]) * (xx[1] - xx[2]);
+
+   if ( (mmesa->backface_sign &&
+	((a < 0 && !signbit( mmesa->backface_sign )) || 
+	(a > 0 && signbit( mmesa->backface_sign )))) ) {
+      /* cull quad */
+      if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS )
+	 fprintf(stderr,"Quad culled\n");
+      return;
+   }
+   
+   ooa = 16.0 / a;
+   
+   vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * sizeof(CARD32) );
+   vbchk = vb + vbsiz;
+
+   COPY_VERTEX( vb, vertsize, v0, 1 );
+   COPY_VERTEX( vb, vertsize, v1, 2 );
+   COPY_VERTEX_OOA( vb, vertsize, v3, 3 );
+   LE32_OUT( vb++, *(CARD32 *)&ooa );
+
+   xy = LE32_IN( &v2->ui[xyoffset] );
+   xx[0] = (GLshort)( xy >> 16 );
+   yy[0] = (GLshort)( xy & 0xffff );
+	   
+   a = (xx[0] - xx[2]) * (yy[1] - yy[2]) -
+       (yy[0] - yy[2]) * (xx[1] - xx[2]);
+   
+   ooa = 16.0 / a;
+   
+   COPY_VERTEX_OOA( vb, vertsize, v2, 1 );
+   LE32_OUT( vb++, *(CARD32 *)&ooa );
+
+   assert( vb == vbchk );
+   
+#if MACH64_PRINT_BUFFER
+   {
+      int i;
+      fprintf(stderr, "quad:\n");
+      for (i = 0; i < vbsiz; i++)
+	 fprintf(stderr, "  %08lx\n", *(vb - vbsiz + i));
+      fprintf(stderr, "\n");
+   }
+#endif
+#else
+   GLuint vertsize = mmesa->vertex_size;
+   GLint coloridx;
+   GLfloat ooa;
+   GLint xx[3], yy[3]; /* 2 fractional bits for hardware */
+   unsigned vbsiz = 
+	 ((
+	    1 +
+	    (vertsize > 6 ? 2 : 0) +
+	    (vertsize > 4 ? 2 : 0) +
+	    3 +
+	    (mmesa->multitex ? 4 : 0)
+	 ) * 4 + 4);
+   CARD32 *vb;
+   unsigned vbidx = 0;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
+      fprintf(stderr, "%s:\n", __FUNCTION__);
+      fprintf(stderr,"Vertex 1: x: %.2f, y: %.2f, z: %.2f, w: %f\n\ts0: %f, t0: %f\n\ts1: %f, t1: %f\n", 
+	      v0->v.x, v0->v.y, v0->v.z, v0->v.w, v0->v.u0, v0->v.v0, v0->v.u1, v0->v.v1);
+      fprintf(stderr,"Vertex 2: x: %.2f, y: %.2f, z: %.2f, w: %f\n\ts0: %f, t0: %f\n\ts1: %f, t1: %f\n", 
+	      v1->v.x, v1->v.y, v1->v.z, v1->v.w, v1->v.u0, v1->v.v0, v1->v.u1, v1->v.v1);
+      fprintf(stderr,"Vertex 3: x: %.2f, y: %.2f, z: %.2f, w: %f\n\ts0: %f, t0: %f\n\ts1: %f, t1: %f\n", 
+	      v2->v.x, v2->v.y, v2->v.z, v2->v.w, v2->v.u0, v2->v.v0, v2->v.u1, v2->v.v1);
+      fprintf(stderr,"Vertex 4: x: %.2f, y: %.2f, z: %.2f, w: %f\n\ts0: %f, t0: %f\n\ts1: %f, t1: %f\n", 
+	      v3->v.x, v3->v.y, v3->v.z, v3->v.w, v3->v.u0, v3->v.v0, v3->v.u1, v3->v.v1);
+   }
+
+#if MACH64_CLIENT_STATE_EMITS
+   /* Enable for interleaved client-side state emits */
+   LOCK_HARDWARE( mmesa );
+   if ( mmesa->dirty ) {
+      mach64EmitHwStateLocked( mmesa );
+   }
+   if ( mmesa->sarea->dirty ) {
+      mach64UploadHwStateLocked( mmesa );
+   }
+   UNLOCK_HARDWARE( mmesa );
+#endif
+
+   xx[0] = (GLint)(v0->v.x * 4);
+   yy[0] = (GLint)(v0->v.y * 4);
+
+   xx[1] = (GLint)(v1->v.x * 4);
+   yy[1] = (GLint)(v1->v.y * 4);
+
+   xx[2] = (GLint)(v3->v.x * 4);
+   yy[2] = (GLint)(v3->v.y * 4);
+
+   ooa = 0.25 * 0.25 * ((xx[0] - xx[2]) * (yy[1] - yy[2]) -
+			(yy[0] - yy[2]) * (xx[1] - xx[2]));
+   
+   if ( ooa * mmesa->backface_sign < 0 ) {
+      /* cull quad */
+      if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS )
+	 fprintf(stderr,"Quad culled\n");
+      return;
+   }
+   
+   vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * 4 );
+   
+   ooa = 1.0 / ooa;
+
+   coloridx = (vertsize > 4) ? 4: 3;
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_1_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_1_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_W */
+      LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) );         /* MACH64_VERTEX_1_Z */
+   vb[vbidx++] = v0->ui[coloridx];                            /* MACH64_VERTEX_1_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */
+
+   if (mmesa->multitex) {
+      /* setup for 3 sequential reg writes */
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_SECONDARY_S) );
+      LE32_OUT( &vb[vbidx++], v0->ui[8] ); /* MACH64_VERTEX_1_SECONDARY_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[9] ); /* MACH64_VERTEX_1_SECONDARY_T */
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_SECONDARY_W */
+   }
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_2_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_2_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v1->ui[6] ); /* MACH64_VERTEX_2_S */
+      LE32_OUT( &vb[vbidx++], v1->ui[7] ); /* MACH64_VERTEX_2_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_2_W */
+      LE32_OUT( &vb[vbidx++], v1->ui[5] ); /* MACH64_VERTEX_2_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v1->v.z) << 15) );         /* MACH64_VERTEX_2_Z */
+   vb[vbidx++] = v1->ui[coloridx];                            /* MACH64_VERTEX_2_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[1] << 16) | (yy[1] & 0xffff) ); /* MACH64_VERTEX_2_X_Y */
+
+   if (mmesa->multitex) {
+      /* setup for 3 sequential reg writes */
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_SECONDARY_S) );
+      LE32_OUT( &vb[vbidx++], v1->ui[8] ); /* MACH64_VERTEX_2_SECONDARY_S */
+      LE32_OUT( &vb[vbidx++], v1->ui[9] ); /* MACH64_VERTEX_2_SECONDARY_T */
+      LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_2_SECONDARY_W */
+   }
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_3_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_3_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v3->ui[6] ); /* MACH64_VERTEX_3_S */
+      LE32_OUT( &vb[vbidx++], v3->ui[7] ); /* MACH64_VERTEX_3_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v3->ui[3] ); /* MACH64_VERTEX_3_W */
+      LE32_OUT( &vb[vbidx++], v3->ui[5] ); /* MACH64_VERTEX_3_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v3->v.z) << 15) );         /* MACH64_VERTEX_3_Z */
+   vb[vbidx++] = v3->ui[coloridx];                             /* MACH64_VERTEX_3_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[2] << 16) | (yy[2] & 0xffff) ); /* MACH64_VERTEX_3_X_Y */
+
+   if (mmesa->multitex) {
+      /* setup for 3 sequential reg writes */
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_SECONDARY_S) );
+      LE32_OUT( &vb[vbidx++], v3->ui[8] ); /* MACH64_VERTEX_3_SECONDARY_S */
+      LE32_OUT( &vb[vbidx++], v3->ui[9] ); /* MACH64_VERTEX_3_SECONDARY_T */
+      LE32_OUT( &vb[vbidx++], v3->ui[3] ); /* MACH64_VERTEX_3_SECONDARY_W */
+   }
+
+   LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) );
+   LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa );
+
+   xx[0] = (GLint)(v2->v.x * 4);
+   yy[0] = (GLint)(v2->v.y * 4);
+
+   ooa = 0.25 * 0.25 * ((xx[0] - xx[2]) * (yy[1] - yy[2]) -
+			(yy[0] - yy[2]) * (xx[1] - xx[2]));
+   ooa = 1.0 / ooa;
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v2->ui[6] ); /* MACH64_VERTEX_1_S */
+      LE32_OUT( &vb[vbidx++], v2->ui[7] ); /* MACH64_VERTEX_1_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v2->ui[3] ); /* MACH64_VERTEX_1_W */
+      LE32_OUT( &vb[vbidx++], v2->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v2->v.z) << 15) );         /* MACH64_VERTEX_1_Z */
+   vb[vbidx++] = v2->ui[coloridx];                             /* MACH64_VERTEX_1_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */
+
+   if (mmesa->multitex) {
+      /* setup for 3 sequential reg writes */
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_SECONDARY_S) );
+      LE32_OUT( &vb[vbidx++], v2->ui[8] ); /* MACH64_VERTEX_1_SECONDARY_S */
+      LE32_OUT( &vb[vbidx++], v2->ui[9] ); /* MACH64_VERTEX_1_SECONDARY_T */
+      LE32_OUT( &vb[vbidx++], v2->ui[3] ); /* MACH64_VERTEX_1_SECONDARY_W */
+   }
+
+   LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) );
+   LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa );
+
+   assert(vbsiz == vbidx);
+
+#if MACH64_PRINT_BUFFER
+   {
+      int i;
+      fprintf(stderr, "quad:\n");
+      for (i = 0; i < vbsiz; i++)
+	 fprintf(stderr, "  %08lx\n", *(vb + i));
+      fprintf(stderr, "\n");
+   }
+#endif
+#endif
+}
+
+static __inline void mach64_draw_triangle( mach64ContextPtr mmesa,
+					   mach64VertexPtr v0,
+					   mach64VertexPtr v1,
+					   mach64VertexPtr v2 )
+{
+#if MACH64_NATIVE_VTXFMT
+   GLcontext *ctx = mmesa->glCtx;
+   GLuint vertsize = mmesa->vertex_size;
+   GLint a;
+   GLfloat ooa;
+   GLuint xy;
+   const GLuint xyoffset = 9;
+   GLint xx[3], yy[3]; /* 2 fractional bits for hardware */
+   unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 3 + 1;
+   CARD32 *vb, *vbchk;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
+      fprintf(stderr, "%s:\n", __FUNCTION__);
+      fprintf(stderr,"Vertex 1:\n");
+      mach64_print_vertex( ctx, v0 );
+      fprintf(stderr,"Vertex 2:\n");
+      mach64_print_vertex( ctx, v1 );
+      fprintf(stderr,"Vertex 3:\n");
+      mach64_print_vertex( ctx, v2 );
+   }
+   
+   xy = LE32_IN( &v0->ui[xyoffset] );
+   xx[0] = (GLshort)( xy >> 16 );
+   yy[0] = (GLshort)( xy & 0xffff );
+   
+   xy = LE32_IN( &v1->ui[xyoffset] );
+   xx[1] = (GLshort)( xy >> 16 );
+   yy[1] = (GLshort)( xy & 0xffff );
+   
+   xy = LE32_IN( &v2->ui[xyoffset] );
+   xx[2] = (GLshort)( xy >> 16 );
+   yy[2] = (GLshort)( xy & 0xffff );
+	   
+   a = (xx[0] - xx[2]) * (yy[1] - yy[2]) -
+       (yy[0] - yy[2]) * (xx[1] - xx[2]);
+   
+   if ( mmesa->backface_sign &&
+	((a < 0 && !signbit( mmesa->backface_sign )) || 
+	(a > 0 && signbit( mmesa->backface_sign ))) ) {
+      /* cull triangle */
+      if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS )
+	 fprintf(stderr,"Triangle culled\n");
+      return;
+   }
+   
+   ooa = 16.0 / a;
+   
+   vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * sizeof(CARD32) );
+   vbchk = vb + vbsiz;
+
+   COPY_VERTEX( vb, vertsize, v0, 1 );
+   COPY_VERTEX( vb, vertsize, v1, 2 );
+   COPY_VERTEX_OOA( vb, vertsize, v2, 3 );
+   LE32_OUT( vb++, *(CARD32 *)&ooa );
+
+   assert( vb == vbchk );
+
+#if MACH64_PRINT_BUFFER
+   {
+      int i;
+      fprintf(stderr, "tri:\n");
+      for (i = 0; i < vbsiz; i++)
+	 fprintf(stderr, "  %08lx\n", *(vb - vbsiz + i));
+      fprintf(stderr, "\n");
+   }
+#endif
+#else
+   GLuint vertsize = mmesa->vertex_size;
+   GLint coloridx;
+   GLfloat ooa;
+   GLint xx[3], yy[3]; /* 2 fractional bits for hardware */
+   unsigned vbsiz = 
+	 ((
+	    1 +
+	    (vertsize > 6 ? 2 : 0) +
+	    (vertsize > 4 ? 2 : 0) +
+	    3 +
+	    (mmesa->multitex ? 4 : 0)
+	 ) * 3 + 2);
+   CARD32 *vb;
+   unsigned vbidx = 0;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
+      fprintf(stderr, "%s:\n", __FUNCTION__);
+      fprintf(stderr,"Vertex 1: x: %.2f, y: %.2f, z: %.2f, w: %f\n\ts0: %f, t0: %f\n\ts1: %f, t1: %f\n", 
+	      v0->v.x, v0->v.y, v0->v.z, v0->v.w, v0->v.u0, v0->v.v0, v0->v.u1, v0->v.v1);
+      fprintf(stderr,"Vertex 2: x: %.2f, y: %.2f, z: %.2f, w: %f\n\ts0: %f, t0: %f\n\ts1: %f, t1: %f\n", 
+	      v1->v.x, v1->v.y, v1->v.z, v1->v.w, v1->v.u0, v1->v.v0, v1->v.u1, v1->v.v1);
+      fprintf(stderr,"Vertex 3: x: %.2f, y: %.2f, z: %.2f, w: %f\n\ts0: %f, t0: %f\n\ts1: %f, t1: %f\n", 
+	      v2->v.x, v2->v.y, v2->v.z, v2->v.w, v2->v.u0, v2->v.v0, v2->v.u1, v2->v.v1);
+   }
+
+#if MACH64_CLIENT_STATE_EMITS
+   /* Enable for interleaved client-side state emits */
+   LOCK_HARDWARE( mmesa );
+   if ( mmesa->dirty ) {
+      mach64EmitHwStateLocked( mmesa );
+   }
+   if ( mmesa->sarea->dirty ) {
+      mach64UploadHwStateLocked( mmesa );
+   }
+   UNLOCK_HARDWARE( mmesa );
+#endif
+
+   xx[0] = (GLint)(v0->v.x * 4);
+   yy[0] = (GLint)(v0->v.y * 4);
+
+   xx[1] = (GLint)(v1->v.x * 4);
+   yy[1] = (GLint)(v1->v.y * 4);
+
+   xx[2] = (GLint)(v2->v.x * 4);
+   yy[2] = (GLint)(v2->v.y * 4);
+
+   ooa = 0.25 * 0.25 * ((xx[0] - xx[2]) * (yy[1] - yy[2]) -
+			(yy[0] - yy[2]) * (xx[1] - xx[2]));
+
+   if ( ooa * mmesa->backface_sign < 0 ) {
+      /* cull triangle */
+       if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS )
+	 fprintf(stderr,"Triangle culled\n");
+      return;
+   }
+
+   vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * 4 );
+   
+   ooa = 1.0 / ooa;
+
+   coloridx = (vertsize > 4) ? 4: 3;
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_1_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_1_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_W */
+      LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) );         /* MACH64_VERTEX_1_Z */
+   vb[vbidx++] = v0->ui[coloridx];                             /* MACH64_VERTEX_1_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */
+
+   if (mmesa->multitex) {
+      /* setup for 3 sequential reg writes */
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_SECONDARY_S) );
+      LE32_OUT( &vb[vbidx++], v0->ui[8] ); /* MACH64_VERTEX_1_SECONDARY_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[9] ); /* MACH64_VERTEX_1_SECONDARY_T */
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_SECONDARY_W */
+   }
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_2_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_2_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v1->ui[6] ); /* MACH64_VERTEX_2_S */
+      LE32_OUT( &vb[vbidx++], v1->ui[7] ); /* MACH64_VERTEX_2_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_2_W */
+      LE32_OUT( &vb[vbidx++], v1->ui[5] ); /* MACH64_VERTEX_2_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v1->v.z) << 15) );         /* MACH64_VERTEX_2_Z */
+   vb[vbidx++] = v1->ui[coloridx];                             /* MACH64_VERTEX_2_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[1] << 16) | (yy[1] & 0xffff) ); /* MACH64_VERTEX_2_X_Y */
+
+   if (mmesa->multitex) {
+      /* setup for 3 sequential reg writes */
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_SECONDARY_S) );
+      LE32_OUT( &vb[vbidx++], v1->ui[8] ); /* MACH64_VERTEX_2_SECONDARY_S */
+      LE32_OUT( &vb[vbidx++], v1->ui[9] ); /* MACH64_VERTEX_2_SECONDARY_T */
+      LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_2_SECONDARY_W */
+   }
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_3_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_3_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v2->ui[6] ); /* MACH64_VERTEX_3_S */
+      LE32_OUT( &vb[vbidx++], v2->ui[7] ); /* MACH64_VERTEX_3_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v2->ui[3] ); /* MACH64_VERTEX_3_W */
+      LE32_OUT( &vb[vbidx++], v2->ui[5] ); /* MACH64_VERTEX_3_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v2->v.z) << 15) );         /* MACH64_VERTEX_3_Z */
+   vb[vbidx++] = v2->ui[coloridx];                             /* MACH64_VERTEX_3_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[2] << 16) | (yy[2] & 0xffff) ); /* MACH64_VERTEX_3_X_Y */
+
+   if (mmesa->multitex) {
+      /* setup for 3 sequential reg writes */
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_SECONDARY_S) );
+      LE32_OUT( &vb[vbidx++], v2->ui[8] ); /* MACH64_VERTEX_3_SECONDARY_S */
+      LE32_OUT( &vb[vbidx++], v2->ui[9] ); /* MACH64_VERTEX_3_SECONDARY_T */
+      LE32_OUT( &vb[vbidx++], v2->ui[3] ); /* MACH64_VERTEX_3_SECONDARY_W */
+   }
+
+   LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) );
+   LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa );
+
+   assert(vbsiz == vbidx);
+
+#if MACH64_PRINT_BUFFER
+   {
+      int i;
+      fprintf(stderr, "tri:\n");
+      for (i = 0; i < vbsiz; ++i)
+	 fprintf(stderr, "  %08lx\n", *(vb + i));
+      fprintf(stderr, "\n");
+   }
+#endif
+#endif
+}
+
+static __inline void mach64_draw_line( mach64ContextPtr mmesa,
+				     mach64VertexPtr v0,
+				     mach64VertexPtr v1 )
+{
+#if MACH64_NATIVE_VTXFMT
+   GLcontext *ctx = mmesa->glCtx;
+   const GLuint vertsize = mmesa->vertex_size;
+   GLint width = (GLint)(mmesa->glCtx->Line._Width * 2.0); /* 2 fractional bits for hardware */
+   GLfloat ooa;
+   GLuint *pxy0, *pxy1;
+   GLuint xy0old, xy0, xy1old, xy1;
+   const GLuint xyoffset = 9;
+   GLint x0, y0, x1, y1;
+   GLint dx, dy, ix, iy;
+   unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 4 + 2;
+   CARD32 *vb, *vbchk;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
+      fprintf(stderr, "%s:\n", __FUNCTION__);
+      fprintf(stderr,"Vertex 1:\n");
+      mach64_print_vertex( ctx, v0 );
+      fprintf(stderr,"Vertex 2:\n");
+      mach64_print_vertex( ctx, v1 );
+   }
+  
+   if( !width )
+      width = 1;	/* round to the nearest supported width */
+      
+   pxy0 = &v0->ui[xyoffset];
+   xy0old = *pxy0;
+   xy0 = LE32_IN( &xy0old );
+   x0 = (GLshort)( xy0 >> 16 );
+   y0 = (GLshort)( xy0 & 0xffff );
+   
+   pxy1 = &v1->ui[xyoffset];
+   xy1old = *pxy1;
+   xy1 = LE32_IN( &xy1old );
+   x1 = (GLshort)( xy1 >> 16 );
+   y1 = (GLshort)( xy1 & 0xffff );
+   
+   if ( (dx = x1 - x0) < 0 ) {
+      dx = -dx;
+   }
+   if ( (dy = y1 - y0) < 0 ) {
+      dy = -dy;
+   }
+   
+   /* adjust vertices depending on line direction */
+   if ( dx > dy ) {
+      ix = 0;
+      iy = width;
+      ooa = 8.0 / ((x1 - x0) * width);
+   } else {
+      ix = width;
+      iy = 0;
+      ooa = 8.0 / ((y0 - y1) * width);
+   }
+
+   vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * sizeof(CARD32) );
+   vbchk = vb + vbsiz;
+
+   LE32_OUT( pxy0, (( x0 - ix ) << 16) | (( y0 - iy ) & 0xffff) );
+   COPY_VERTEX( vb, vertsize, v0, 1 );
+   LE32_OUT( pxy1, (( x1 - ix ) << 16) | (( y1 - iy ) & 0xffff) );
+   COPY_VERTEX( vb, vertsize, v1, 2 );
+   LE32_OUT( pxy0, (( x0 + ix ) << 16) | (( y0 + iy ) & 0xffff) );
+   COPY_VERTEX_OOA( vb, vertsize, v0, 3 );
+   LE32_OUT( vb++, *(CARD32 *)&ooa );
+
+   ooa = -ooa;
+   
+   LE32_OUT( pxy1, (( x1 + ix ) << 16) | (( y1 + iy ) & 0xffff) );
+   COPY_VERTEX_OOA( vb, vertsize, v1, 1 );
+   LE32_OUT( vb++, *(CARD32 *)&ooa );
+
+   *pxy0 = xy0old;
+   *pxy1 = xy1old;
+#else /* !MACH64_NATIVE_VTXFMT */
+   GLuint vertsize = mmesa->vertex_size;
+   GLint coloridx;
+   float width = 1.0; /* Only support 1 pix lines now */
+   GLfloat ooa;
+   GLint xx[3], yy[3]; /* 2 fractional bits for hardware */
+   unsigned vbsiz = 
+	 ((
+	    1 +
+	    (vertsize > 6 ? 2 : 0) +
+	    (vertsize > 4 ? 2 : 0) +
+	    3 +
+	    (mmesa->multitex ? 4 : 0)
+	 ) * 4 + 4);
+   CARD32 *vb;
+   unsigned vbidx = 0;
+   
+   GLfloat hw, dx, dy, ix, iy;
+   GLfloat x0 = v0->v.x;
+   GLfloat y0 = v0->v.y;
+   GLfloat x1 = v1->v.x;
+   GLfloat y1 = v1->v.y;
+
+#if MACH64_CLIENT_STATE_EMITS
+   /* Enable for interleaved client-side state emits */
+   LOCK_HARDWARE( mmesa );
+   if ( mmesa->dirty ) {
+      mach64EmitHwStateLocked( mmesa );
+   }
+   if ( mmesa->sarea->dirty ) {
+      mach64UploadHwStateLocked( mmesa );
+   }
+   UNLOCK_HARDWARE( mmesa );
+#endif
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
+      fprintf(stderr, "%s:\n", __FUNCTION__);
+      fprintf(stderr,"Vertex 1: x: %.2f, y: %.2f, z: %.2f, w: %f\n", 
+	      v0->v.x, v0->v.y, v0->v.z, v0->v.w);
+      fprintf(stderr,"Vertex 2: x: %.2f, y: %.2f, z: %.2f, w: %f\n", 
+	      v1->v.x, v1->v.y, v1->v.z, v1->v.w);
+   }
+
+   hw = 0.5F * width;
+   if (hw > 0.1F && hw < 0.5F) {
+      hw = 0.5F;
+   }
+
+   /* adjust vertices depending on line direction */
+   dx = v0->v.x - v1->v.x;
+   dy = v0->v.y - v1->v.y;
+   if (dx * dx > dy * dy) {
+      /* X-major line */
+      ix = 0.0F;
+      iy = hw;
+      if (x1 < x0) {
+         x0 += 0.5F;
+         x1 += 0.5F;
+      }
+      y0 -= 0.5F;
+      y1 -= 0.5F;
+   }
+   else {
+      /* Y-major line */
+      ix = hw;
+      iy = 0.0F;
+      if (y1 > y0) {
+         y0 -= 0.5F;
+         y1 -= 0.5F;
+      }
+      x0 += 0.5F;
+      x1 += 0.5F;
+   }
+
+   xx[0] = (GLint)((x0 - ix) * 4);
+   yy[0] = (GLint)((y0 - iy) * 4);
+
+   xx[1] = (GLint)((x1 - ix) * 4);
+   yy[1] = (GLint)((y1 - iy) * 4);
+
+   xx[2] = (GLint)((x0 + ix) * 4);
+   yy[2] = (GLint)((y0 + iy) * 4);
+
+   ooa = 0.25 * 0.25 * ((xx[0] - xx[2]) * (yy[1] - yy[2]) -
+			(yy[0] - yy[2]) * (xx[1] - xx[2]));
+
+   if ( ooa * mmesa->backface_sign < 0 ) {
+      /* cull line */
+      if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS )
+	 fprintf(stderr,"Line culled\n");
+      return;
+   }
+
+   vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * 4 );
+   
+   ooa = 1.0 / ooa;
+
+   coloridx = (vertsize > 4) ? 4: 3;
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_1_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_1_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_W */
+      LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) );         /* MACH64_VERTEX_1_Z */
+   vb[vbidx++] = v0->ui[coloridx];                             /* MACH64_VERTEX_1_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_2_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_2_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v1->ui[6] ); /* MACH64_VERTEX_2_S */
+      LE32_OUT( &vb[vbidx++], v1->ui[7] ); /* MACH64_VERTEX_2_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_2_W */
+      LE32_OUT( &vb[vbidx++], v1->ui[5] ); /* MACH64_VERTEX_2_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v1->v.z) << 15) );         /* MACH64_VERTEX_2_Z */
+   vb[vbidx++] = v1->ui[coloridx];                             /* MACH64_VERTEX_2_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[1] << 16) | (yy[1] & 0xffff) ); /* MACH64_VERTEX_2_X_Y */
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_3_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_3_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_3_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_3_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_3_W */
+      LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_3_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) );         /* MACH64_VERTEX_3_Z */
+   vb[vbidx++] = v0->ui[coloridx];                             /* MACH64_VERTEX_3_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[2] << 16) | (yy[2] & 0xffff) ); /* MACH64_VERTEX_3_X_Y */
+
+   LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) );
+   LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa );
+
+   xx[0] = (GLint)((x1 + ix) * 4);
+   yy[0] = (GLint)((y1 + iy) * 4);
+
+   ooa = 0.25 * 0.25 * ((xx[0] - xx[2]) * (yy[1] - yy[2]) -
+			(yy[0] - yy[2]) * (xx[1] - xx[2]));
+   ooa = 1.0 / ooa;
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v1->ui[6] ); /* MACH64_VERTEX_1_S */
+      LE32_OUT( &vb[vbidx++], v1->ui[7] ); /* MACH64_VERTEX_1_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_1_W */
+      LE32_OUT( &vb[vbidx++], v1->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v1->v.z) << 15) );         /* MACH64_VERTEX_1_Z */
+   vb[vbidx++] = v1->ui[coloridx];                             /* MACH64_VERTEX_1_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */
+
+   LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) );
+   LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa );
+
+   assert(vbsiz == vbidx);
+#endif
+}
+
+static __inline void mach64_draw_point( mach64ContextPtr mmesa,
+				      mach64VertexPtr v0 )
+{
+#if MACH64_NATIVE_VTXFMT
+   GLcontext *ctx = mmesa->glCtx;
+   const GLuint vertsize = mmesa->vertex_size;
+   GLint sz = (GLint)(mmesa->glCtx->Point._Size * 2.0); /* 2 fractional bits for hardware */
+   GLfloat ooa;
+   GLuint *pxy;
+   GLuint xyold, xy;
+   const GLuint xyoffset = 9;
+   GLint x, y;
+   unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 4 + 2;
+   CARD32 *vb, *vbchk;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
+      fprintf(stderr, "%s:\n", __FUNCTION__);
+      fprintf(stderr,"Vertex 1:\n");
+      mach64_print_vertex( ctx, v0 );
+   }
+  
+   if( !sz )
+      sz = 1;	/* round to the nearest supported size */
+      
+   pxy = &v0->ui[xyoffset];
+   xyold = *pxy;
+   xy = LE32_IN( &xyold );
+   x = (GLshort)( xy >> 16 );
+   y = (GLshort)( xy & 0xffff );
+   
+   ooa = 4.0 / (sz * sz);
+   
+   vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * sizeof(CARD32) );
+   vbchk = vb + vbsiz;
+
+   LE32_OUT( pxy, (( x - sz ) << 16) | (( y - sz ) & 0xffff) );
+   COPY_VERTEX( vb, vertsize, v0, 1 );
+   LE32_OUT( pxy, (( x + sz ) << 16) | (( y - sz ) & 0xffff) );
+   COPY_VERTEX( vb, vertsize, v0, 2 );
+   LE32_OUT( pxy, (( x - sz ) << 16) | (( y + sz ) & 0xffff) );
+   COPY_VERTEX_OOA( vb, vertsize, v0, 3 );
+   LE32_OUT( vb++, *(CARD32 *)&ooa );
+
+   ooa = -ooa;
+   
+   LE32_OUT( pxy, (( x + sz ) << 16) | (( y + sz ) & 0xffff) );
+   COPY_VERTEX_OOA( vb, vertsize, v0, 1 );
+   LE32_OUT( vb++, *(CARD32 *)&ooa );
+
+   *pxy = xyold;
+#else /* !MACH64_NATIVE_VTXFMT */
+   GLuint vertsize = mmesa->vertex_size; 
+   GLint coloridx;
+   float sz = 1.0; /* Only support 1 pix points now */
+   GLfloat ooa;
+   GLint xx[3], yy[3]; /* 2 fractional bits for hardware */
+   unsigned vbsiz = 
+	 ((
+	    1 +
+	    (vertsize > 6 ? 2 : 0) +
+	    (vertsize > 4 ? 2 : 0) +
+	    3 +
+	    (mmesa->multitex ? 4 : 0)
+	 ) * 4 + 4);
+   CARD32 *vb;
+   unsigned vbidx = 0;
+   
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
+      fprintf(stderr, "%s:\n", __FUNCTION__);
+      fprintf(stderr,"Vertex 1: x: %.2f, y: %.2f, z: %.2f, w: %f\n", 
+	      v0->v.x, v0->v.y, v0->v.z, v0->v.w);
+   }
+
+#if MACH64_CLIENT_STATE_EMITS
+   /* Enable for interleaved client-side state emits */
+   LOCK_HARDWARE( mmesa );
+   if ( mmesa->dirty ) {
+      mach64EmitHwStateLocked( mmesa );
+   }
+   if ( mmesa->sarea->dirty ) {
+      mach64UploadHwStateLocked( mmesa );
+   }
+   UNLOCK_HARDWARE( mmesa );
+#endif
+
+   xx[0] = (GLint)((v0->v.x - sz) * 4);
+   yy[0] = (GLint)((v0->v.y - sz) * 4);
+
+   xx[1] = (GLint)((v0->v.x + sz) * 4);
+   yy[1] = (GLint)((v0->v.y - sz) * 4);
+
+   xx[2] = (GLint)((v0->v.x - sz) * 4);
+   yy[2] = (GLint)((v0->v.y + sz) * 4);
+
+   ooa = 0.25 * 0.25 * ((xx[0] - xx[2]) * (yy[1] - yy[2]) -
+			(yy[0] - yy[2]) * (xx[1] - xx[2]));
+
+   if ( ooa * mmesa->backface_sign < 0 ) {
+      /* cull quad */
+      if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS )
+	 fprintf(stderr,"Point culled\n");
+      return;
+   }
+
+   vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * 4 );
+   
+   ooa = 1.0 / ooa;
+
+   coloridx = (vertsize > 4) ? 4: 3;
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_1_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_1_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_W */
+      LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) );         /* MACH64_VERTEX_1_Z */
+   vb[vbidx++] = v0->ui[coloridx];                             /* MACH64_VERTEX_1_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_2_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_2_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_2_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_2_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_2_W */
+      LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_2_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) );         /* MACH64_VERTEX_2_Z */
+   vb[vbidx++] = v0->ui[coloridx];                             /* MACH64_VERTEX_2_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[1] << 16) | (yy[1] & 0xffff) ); /* MACH64_VERTEX_2_X_Y */
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_3_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_3_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_3_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_3_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_3_W */
+      LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_3_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) );         /* MACH64_VERTEX_3_Z */
+   vb[vbidx++] = v0->ui[coloridx];                             /* MACH64_VERTEX_3_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[2] << 16) | (yy[2] & 0xffff) ); /* MACH64_VERTEX_3_X_Y */
+
+   LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) );
+   LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa );
+
+   xx[0] = (GLint)((v0->v.x + sz) * 4);
+   yy[0] = (GLint)((v0->v.y + sz) * 4);
+
+   ooa = 0.25 * 0.25 * ((xx[0] - xx[2]) * (yy[1] - yy[2]) -
+			(yy[0] - yy[2]) * (xx[1] - xx[2]));
+   ooa = 1.0 / ooa;
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_1_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_1_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_W */
+      LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) );         /* MACH64_VERTEX_1_Z */
+   vb[vbidx++] = v0->ui[coloridx];                             /* MACH64_VERTEX_1_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */
+
+   LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) );
+   LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa );
+
+   assert(vbsiz == vbidx);
+#endif
+}
+
+/***********************************************************************
+ *          Macros for t_dd_tritmp.h to draw basic primitives          *
+ ***********************************************************************/
+
+#define TRI( a, b, c )				\
+do {						\
+   if (DO_FALLBACK)				\
+      mmesa->draw_tri( mmesa, a, b, c );	\
+   else						\
+      mach64_draw_triangle( mmesa, a, b, c );	\
+} while (0)
+
+#define QUAD( a, b, c, d )			\
+do {						\
+   if (DO_FALLBACK) {				\
+      mmesa->draw_tri( mmesa, a, b, d );	\
+      mmesa->draw_tri( mmesa, b, c, d );	\
+   } else 					\
+      mach64_draw_quad( mmesa, a, b, c, d );	\
+} while (0)
+
+#define LINE( v0, v1 )				\
+do {						\
+   if (DO_FALLBACK)				\
+      mmesa->draw_line( mmesa, v0, v1 );	\
+   else 					\
+      mach64_draw_line( mmesa, v0, v1 );	\
+} while (0)
+
+#define POINT( v0 )				\
+do {						\
+   if (DO_FALLBACK)				\
+      mmesa->draw_point( mmesa, v0 );		\
+   else 					\
+      mach64_draw_point( mmesa, v0 );		\
+} while (0)
+
+
+/***********************************************************************
+ *              Build render functions from dd templates               *
+ ***********************************************************************/
+
+#define MACH64_OFFSET_BIT	0x01
+#define MACH64_TWOSIDE_BIT	0x02
+#define MACH64_UNFILLED_BIT	0x04
+#define MACH64_FALLBACK_BIT	0x08
+#define MACH64_MAX_TRIFUNC	0x10
+
+static struct {
+   points_func	        points;
+   line_func		line;
+   triangle_func	triangle;
+   quad_func		quad;
+} rast_tab[MACH64_MAX_TRIFUNC];
+
+
+#define DO_FALLBACK (IND & MACH64_FALLBACK_BIT)
+#define DO_OFFSET   (IND & MACH64_OFFSET_BIT)
+#define DO_UNFILLED (IND & MACH64_UNFILLED_BIT)
+#define DO_TWOSIDE  (IND & MACH64_TWOSIDE_BIT)
+#define DO_FLAT      0
+#define DO_TRI       1
+#define DO_QUAD      1
+#define DO_LINE      1
+#define DO_POINTS    1
+#define DO_FULL_QUAD 1
+
+#define HAVE_RGBA   1
+#define HAVE_SPEC   1
+#define HAVE_BACK_COLORS  0
+#define HAVE_HW_FLATSHADE 1
+#define VERTEX mach64Vertex
+#define TAB rast_tab
+
+#if MACH64_NATIVE_VTXFMT
+
+/* #define DEPTH_SCALE 65536.0 */
+#define DEPTH_SCALE 1
+#define UNFILLED_TRI unfilled_tri
+#define UNFILLED_QUAD unfilled_quad
+#define VERT_X(_v) ((GLfloat)(GLshort)(LE32_IN( &(_v)->ui[xyoffset] ) & 0xffff) / 4.0)
+#define VERT_Y(_v) ((GLfloat)(GLshort)(LE32_IN( &(_v)->ui[xyoffset] ) >> 16) / 4.0)
+#define VERT_Z(_v) ((GLfloat) LE32_IN( &(_v)->ui[zoffset] ))
+#define INSANE_VERTICES
+#define VERT_SET_Z(_v,val) LE32_OUT( &(_v)->ui[zoffset], (GLuint)(val) )
+#define VERT_Z_ADD(_v,val) LE32_OUT( &(_v)->ui[zoffset], LE32_IN( &(_v)->ui[zoffset] ) + (GLuint)(val) )
+#define AREA_IS_CCW( a ) ((a) < 0)
+#define GET_VERTEX(e) (mmesa->verts + ((e)<<mmesa->vertex_stride_shift))
+
+#define MACH64_COLOR( dst, src )                \
+do {						\
+   dst[0] = src[2];				\
+   dst[1] = src[1];				\
+   dst[2] = src[0];				\
+   dst[3] = src[3];				\
+} while (0)
+
+#define MACH64_SPEC( dst, src )			\
+do {						\
+   dst[0] = src[2];				\
+   dst[1] = src[1];				\
+   dst[2] = src[0];				\
+} while (0)
+
+#define VERT_SET_RGBA( v, c )    MACH64_COLOR( v->ub4[coloroffset], c )
+#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
+#define VERT_SAVE_RGBA( idx )    color[idx] = v[idx]->ui[coloroffset]
+#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx]
+
+#define VERT_SET_SPEC( v, c )    if (havespec) MACH64_SPEC( v->ub4[specoffset], c )
+#define VERT_COPY_SPEC( v0, v1 ) if (havespec) COPY_3V( v0->ub4[specoffset], v1->ub4[specoffset] )
+#define VERT_SAVE_SPEC( idx )    if (havespec) spec[idx] = v[idx]->ui[specoffset]
+#define VERT_RESTORE_SPEC( idx ) if (havespec) v[idx]->ui[specoffset] = spec[idx]
+
+#define LOCAL_VARS(n)						\
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);		\
+   GLuint color[n], spec[n];					\
+   GLuint vertex_size = mmesa->vertex_size;			\
+   const GLuint xyoffset = 9;					\
+   const GLuint coloroffset = 8;				\
+   const GLuint zoffset = 7;					\
+   const GLuint specoffset = 6;					\
+   GLboolean havespec = vertex_size >= 4 ? 1 : 0;		\
+   (void) color; (void) spec; (void) vertex_size; 		\
+   (void) xyoffset; (void) coloroffset; (void) zoffset;		\
+   (void) specoffset; (void) havespec;
+
+#else
+
+#define DEPTH_SCALE 1.0
+#define UNFILLED_TRI unfilled_tri
+#define UNFILLED_QUAD unfilled_quad
+#define VERT_X(_v) _v->v.x
+#define VERT_Y(_v) _v->v.y
+#define VERT_Z(_v) _v->v.z
+#define AREA_IS_CCW( a ) (a > 0)
+#define GET_VERTEX(e) (mmesa->verts + (e<<mmesa->vertex_stride_shift))
+
+#define MACH64_COLOR( dst, src )                \
+do {						\
+   dst[0] = src[2];				\
+   dst[1] = src[1];				\
+   dst[2] = src[0];				\
+   dst[3] = src[3];				\
+} while (0)
+
+#define MACH64_SPEC( dst, src )			\
+do {						\
+   dst[0] = src[2];				\
+   dst[1] = src[1];				\
+   dst[2] = src[0];				\
+} while (0)
+
+#define VERT_SET_RGBA( v, c )    MACH64_COLOR( v->ub4[coloroffset], c )
+#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
+#define VERT_SAVE_RGBA( idx )    color[idx] = v[idx]->ui[coloroffset]
+#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx]
+
+#define VERT_SET_SPEC( v, c )    if (havespec) MACH64_SPEC( v->ub4[5], c )
+#define VERT_COPY_SPEC( v0, v1 ) if (havespec) COPY_3V(v0->ub4[5], v1->ub4[5])
+#define VERT_SAVE_SPEC( idx )    if (havespec) spec[idx] = v[idx]->ui[5]
+#define VERT_RESTORE_SPEC( idx ) if (havespec) v[idx]->ui[5] = spec[idx]
+
+#define LOCAL_VARS(n)						\
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);		\
+   GLuint color[n], spec[n];					\
+   GLuint coloroffset = (mmesa->vertex_size == 4 ? 3 : 4);	\
+   GLboolean havespec = (mmesa->vertex_size == 4 ? 0 : 1);	\
+   (void) color; (void) spec; (void) coloroffset; (void) havespec;
+
+#endif
+
+/***********************************************************************
+ *                Helpers for rendering unfilled primitives            *
+ ***********************************************************************/
+
+#define RASTERIZE(x) if (mmesa->hw_primitive != hw_prim[x]) \
+                        mach64RasterPrimitive( ctx, hw_prim[x] )
+#define RENDER_PRIMITIVE mmesa->render_primitive
+#define IND MACH64_FALLBACK_BIT
+#define TAG(x) x
+#include "tnl_dd/t_dd_unfilled.h"
+#undef IND
+
+
+/***********************************************************************
+ *                      Generate GL render functions                   *
+ ***********************************************************************/
+
+
+#define IND (0)
+#define TAG(x) x
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_OFFSET_BIT)
+#define TAG(x) x##_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_TWOSIDE_BIT)
+#define TAG(x) x##_twoside
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_TWOSIDE_BIT|MACH64_OFFSET_BIT)
+#define TAG(x) x##_twoside_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_UNFILLED_BIT)
+#define TAG(x) x##_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_OFFSET_BIT|MACH64_UNFILLED_BIT)
+#define TAG(x) x##_offset_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_TWOSIDE_BIT|MACH64_UNFILLED_BIT)
+#define TAG(x) x##_twoside_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_TWOSIDE_BIT|MACH64_OFFSET_BIT|MACH64_UNFILLED_BIT)
+#define TAG(x) x##_twoside_offset_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_FALLBACK_BIT)
+#define TAG(x) x##_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_OFFSET_BIT|MACH64_FALLBACK_BIT)
+#define TAG(x) x##_offset_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_TWOSIDE_BIT|MACH64_FALLBACK_BIT)
+#define TAG(x) x##_twoside_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_TWOSIDE_BIT|MACH64_OFFSET_BIT|MACH64_FALLBACK_BIT)
+#define TAG(x) x##_twoside_offset_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_UNFILLED_BIT|MACH64_FALLBACK_BIT)
+#define TAG(x) x##_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_OFFSET_BIT|MACH64_UNFILLED_BIT|MACH64_FALLBACK_BIT)
+#define TAG(x) x##_offset_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_TWOSIDE_BIT|MACH64_UNFILLED_BIT|MACH64_FALLBACK_BIT)
+#define TAG(x) x##_twoside_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_TWOSIDE_BIT|MACH64_OFFSET_BIT|MACH64_UNFILLED_BIT| \
+	     MACH64_FALLBACK_BIT)
+#define TAG(x) x##_twoside_offset_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+
+static void init_rast_tab( void )
+{
+   init();
+   init_offset();
+   init_twoside();
+   init_twoside_offset();
+   init_unfilled();
+   init_offset_unfilled();
+   init_twoside_unfilled();
+   init_twoside_offset_unfilled();
+   init_fallback();
+   init_offset_fallback();
+   init_twoside_fallback();
+   init_twoside_offset_fallback();
+   init_unfilled_fallback();
+   init_offset_unfilled_fallback();
+   init_twoside_unfilled_fallback();
+   init_twoside_offset_unfilled_fallback();
+}
+
+
+/***********************************************************************
+ *                    Rasterization fallback helpers                   *
+ ***********************************************************************/
+
+
+/* This code is hit only when a mix of accelerated and unaccelerated
+ * primitives are being drawn, and only for the unaccelerated
+ * primitives.
+ */
+static void
+mach64_fallback_tri( mach64ContextPtr mmesa,
+		     mach64Vertex *v0,
+		     mach64Vertex *v1,
+		     mach64Vertex *v2 )
+{
+   GLcontext *ctx = mmesa->glCtx;
+   SWvertex v[3];
+   mach64_translate_vertex( ctx, v0, &v[0] );
+   mach64_translate_vertex( ctx, v1, &v[1] );
+   mach64_translate_vertex( ctx, v2, &v[2] );
+   _swrast_Triangle( ctx, &v[0], &v[1], &v[2] );
+}
+
+
+static void
+mach64_fallback_line( mach64ContextPtr mmesa,
+		    mach64Vertex *v0,
+		    mach64Vertex *v1 )
+{
+   GLcontext *ctx = mmesa->glCtx;
+   SWvertex v[2];
+   mach64_translate_vertex( ctx, v0, &v[0] );
+   mach64_translate_vertex( ctx, v1, &v[1] );
+   _swrast_Line( ctx, &v[0], &v[1] );
+}
+
+
+static void
+mach64_fallback_point( mach64ContextPtr mmesa,
+		     mach64Vertex *v0 )
+{
+   GLcontext *ctx = mmesa->glCtx;
+   SWvertex v[1];
+   mach64_translate_vertex( ctx, v0, &v[0] );
+   _swrast_Point( ctx, &v[0] );
+}
+
+
+
+/**********************************************************************/
+/*               Render unclipped begin/end objects                   */
+/**********************************************************************/
+
+#define VERT(x) (mach64Vertex *)(mach64verts + ((x) << shift))
+#define RENDER_POINTS( start, count )		\
+   for ( ; start < count ; start++)		\
+      mach64_draw_point( mmesa, VERT(start) )
+#define RENDER_LINE( v0, v1 ) \
+   mach64_draw_line( mmesa, VERT(v0), VERT(v1) )
+#define RENDER_TRI( v0, v1, v2 )  \
+   mach64_draw_triangle( mmesa, VERT(v0), VERT(v1), VERT(v2) )
+#define RENDER_QUAD( v0, v1, v2, v3 ) \
+   mach64_draw_quad( mmesa, VERT(v0), VERT(v1), VERT(v2), VERT(v3) )
+#define INIT(x) do {					\
+   if (0) fprintf(stderr, "%s\n", __FUNCTION__);	\
+   mach64RenderPrimitive( ctx, x );			\
+} while (0)
+#undef LOCAL_VARS
+#define LOCAL_VARS						\
+    mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);		\
+    const GLuint shift = mmesa->vertex_stride_shift;		\
+    const char *mach64verts = (char *)mmesa->verts;		\
+    const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
+    (void) elt;
+#define RESET_STIPPLE
+#define RESET_OCCLUSION
+#define PRESERVE_VB_DEFS
+#define ELT(x) (x)
+#define TAG(x) mach64_##x##_verts
+#include "tnl/t_vb_rendertmp.h"
+#undef ELT
+#undef TAG
+#define TAG(x) mach64_##x##_elts
+#define ELT(x) elt[x]
+#include "tnl/t_vb_rendertmp.h"
+
+
+/**********************************************************************/
+/*                    Render clipped primitives                       */
+/**********************************************************************/
+
+static void mach64RenderClippedPoly( GLcontext *ctx, const GLuint *elts,
+				     GLuint n )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT( ctx );
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLuint prim = mmesa->render_primitive;
+
+   /* Render the new vertices as an unclipped polygon.
+    */
+   {
+      GLuint *tmp = VB->Elts;
+      VB->Elts = (GLuint *)elts;
+      tnl->Driver.Render.PrimTabElts[GL_POLYGON]( ctx, 0, n, PRIM_BEGIN|PRIM_END );
+      VB->Elts = tmp;
+   }
+
+   /* Restore the render primitive
+    */
+   if (prim != GL_POLYGON)
+      tnl->Driver.Render.PrimitiveNotify( ctx, prim );
+
+}
+
+static void mach64RenderClippedLine( GLcontext *ctx, GLuint ii, GLuint jj )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   tnl->Driver.Render.Line( ctx, ii, jj );
+}
+
+#if MACH64_NATIVE_VTXFMT
+static void mach64FastRenderClippedPoly( GLcontext *ctx, const GLuint *elts,
+					 GLuint n )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT( ctx );
+   const GLuint vertsize = mmesa->vertex_size;
+   GLint a;
+   GLfloat ooa;
+   GLuint xy;
+   const GLuint xyoffset = 9;
+   GLint xx[3], yy[3]; /* 2 fractional bits for hardware */
+   unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * n + (n-2);
+   CARD32 *vb, *vbchk;
+   GLubyte *mach64verts = (GLubyte *)mmesa->verts;
+   const GLuint shift = mmesa->vertex_stride_shift;
+   mach64VertexPtr v0, v1, v2;
+   int i;
+   
+   v0 = (mach64VertexPtr)VERT(elts[1]);
+   v1 = (mach64VertexPtr)VERT(elts[2]);
+   v2 = (mach64VertexPtr)VERT(elts[0]);
+      
+   xy = LE32_IN( &v0->ui[xyoffset] );
+   xx[0] = (GLshort)( xy >> 16 );
+   yy[0] = (GLshort)( xy & 0xffff );
+   
+   xy = LE32_IN( &v1->ui[xyoffset] );
+   xx[1] = (GLshort)( xy >> 16 );
+   yy[1] = (GLshort)( xy & 0xffff );
+   
+   xy = LE32_IN( &v2->ui[xyoffset] );
+   xx[2] = (GLshort)( xy >> 16 );
+   yy[2] = (GLshort)( xy & 0xffff );
+	   
+   a = (xx[0] - xx[2]) * (yy[1] - yy[2]) -
+       (yy[0] - yy[2]) * (xx[1] - xx[2]);
+
+   if ( (mmesa->backface_sign &&
+	((a < 0 && !signbit( mmesa->backface_sign )) || 
+	(a > 0 && signbit( mmesa->backface_sign )))) ) {
+      /* cull polygon */
+      if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS )
+	 fprintf(stderr,"Polygon culled\n");
+      return;
+   }
+   
+   ooa = 16.0 / a;
+   
+   vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * sizeof(CARD32) );
+   vbchk = vb + vbsiz;
+
+   COPY_VERTEX( vb, vertsize, v0, 1 );
+   COPY_VERTEX( vb, vertsize, v1, 2 );
+   COPY_VERTEX_OOA( vb, vertsize, v2, 3 );
+   LE32_OUT( vb++, *(CARD32 *)&ooa );
+
+   i = 3;
+   while (1) {
+      if (i >= n)
+	 break;
+      v0 = (mach64VertexPtr)VERT(elts[i]);
+      i++;
+
+      xy = LE32_IN( &v0->ui[xyoffset] );
+      xx[0] = (GLshort)( xy >> 16 );
+      yy[0] = (GLshort)( xy & 0xffff );
+	      
+      a = (xx[0] - xx[2]) * (yy[1] - yy[2]) -
+	  (yy[0] - yy[2]) * (xx[1] - xx[2]);
+      ooa = 16.0 / a;
+   
+      COPY_VERTEX_OOA( vb, vertsize, v0, 1 );
+      LE32_OUT( vb++, *(CARD32 *)&ooa );
+      
+      if (i >= n)
+	 break;
+      v1 = (mach64VertexPtr)VERT(elts[i]);
+      i++;
+
+      xy = LE32_IN( &v1->ui[xyoffset] );
+      xx[1] = (GLshort)( xy >> 16 );
+      yy[1] = (GLshort)( xy & 0xffff );
+	      
+      a = (xx[0] - xx[2]) * (yy[1] - yy[2]) -
+	  (yy[0] - yy[2]) * (xx[1] - xx[2]);
+      ooa = 16.0 / a;
+   
+      COPY_VERTEX_OOA( vb, vertsize, v1, 2 );
+      LE32_OUT( vb++, *(CARD32 *)&ooa );
+   }
+
+   assert( vb == vbchk );
+}
+#else
+static void mach64FastRenderClippedPoly( GLcontext *ctx, const GLuint *elts,
+					 GLuint n )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT( ctx );
+   GLubyte *mach64verts = (GLubyte *)mmesa->verts;
+   const GLuint shift = mmesa->vertex_stride_shift;
+   const GLuint *start = (const GLuint *)VERT(elts[0]);
+   int i;
+
+   for (i = 2 ; i < n ; i++) {
+      mach64_draw_triangle( mmesa, 
+			    VERT(elts[i-1]), 
+			    VERT(elts[i]), 
+			    (mach64VertexPtr) start
+			    );
+   }
+}
+#endif /* MACH64_NATIVE_VTXFMT */
+
+/**********************************************************************/
+/*                    Choose render functions                         */
+/**********************************************************************/
+
+#define _MACH64_NEW_RENDER_STATE (_DD_NEW_POINT_SMOOTH |	\
+			          _DD_NEW_LINE_SMOOTH |		\
+			          _DD_NEW_LINE_STIPPLE |	\
+			          _DD_NEW_TRI_SMOOTH |		\
+			          _DD_NEW_TRI_STIPPLE |		\
+			          _NEW_POLYGONSTIPPLE |		\
+			          _DD_NEW_TRI_UNFILLED |	\
+			          _DD_NEW_TRI_LIGHT_TWOSIDE |	\
+			          _DD_NEW_TRI_OFFSET)		\
+
+#define POINT_FALLBACK (DD_POINT_SMOOTH)
+#define LINE_FALLBACK (DD_LINE_SMOOTH|DD_LINE_STIPPLE)
+#define TRI_FALLBACK (DD_TRI_SMOOTH|DD_TRI_STIPPLE)
+#define ANY_FALLBACK_FLAGS (POINT_FALLBACK|LINE_FALLBACK|TRI_FALLBACK)
+#define ANY_RASTER_FLAGS (DD_TRI_LIGHT_TWOSIDE|DD_TRI_OFFSET|DD_TRI_UNFILLED)
+
+
+static void mach64ChooseRenderState(GLcontext *ctx)
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint flags = ctx->_TriangleCaps;
+   GLuint index = 0;
+
+   if (flags & (ANY_RASTER_FLAGS|ANY_FALLBACK_FLAGS)) {
+      mmesa->draw_point = mach64_draw_point;
+      mmesa->draw_line = mach64_draw_line;
+      mmesa->draw_tri = mach64_draw_triangle;
+
+      if (flags & ANY_RASTER_FLAGS) {
+	 if (flags & DD_TRI_LIGHT_TWOSIDE) index |= MACH64_TWOSIDE_BIT;
+	 if (flags & DD_TRI_OFFSET)        index |= MACH64_OFFSET_BIT;
+	 if (flags & DD_TRI_UNFILLED)      index |= MACH64_UNFILLED_BIT;
+      }
+
+      /* Hook in fallbacks for specific primitives.
+       */
+      if (flags & (POINT_FALLBACK|LINE_FALLBACK|TRI_FALLBACK)) {
+	 if (flags & POINT_FALLBACK) mmesa->draw_point = mach64_fallback_point;
+	 if (flags & LINE_FALLBACK)  mmesa->draw_line = mach64_fallback_line;
+	 if (flags & TRI_FALLBACK)   mmesa->draw_tri = mach64_fallback_tri;
+	 index |= MACH64_FALLBACK_BIT;
+      }
+   }
+
+   if (index != mmesa->RenderIndex) {
+      TNLcontext *tnl = TNL_CONTEXT(ctx);
+      tnl->Driver.Render.Points = rast_tab[index].points;
+      tnl->Driver.Render.Line = rast_tab[index].line;
+      tnl->Driver.Render.Triangle = rast_tab[index].triangle;
+      tnl->Driver.Render.Quad = rast_tab[index].quad;
+
+      if (index == 0) {
+	 tnl->Driver.Render.PrimTabVerts = mach64_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = mach64_render_tab_elts;
+	 tnl->Driver.Render.ClippedLine = rast_tab[index].line;
+	 tnl->Driver.Render.ClippedPolygon = mach64FastRenderClippedPoly;
+      } else {
+	 tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
+	 tnl->Driver.Render.ClippedLine = mach64RenderClippedLine;
+	 tnl->Driver.Render.ClippedPolygon = mach64RenderClippedPoly;
+      }
+
+      mmesa->RenderIndex = index;
+   }
+}
+
+/**********************************************************************/
+/*                 Validate state at pipeline start                   */
+/**********************************************************************/
+
+static void mach64RunPipeline( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   if (mmesa->new_state)
+      mach64DDUpdateHWState( ctx );
+
+   if (!mmesa->Fallback && mmesa->NewGLState) {
+      if (mmesa->NewGLState & _MACH64_NEW_VERTEX_STATE)
+	 mach64ChooseVertexState( ctx );
+
+      if (mmesa->NewGLState & _MACH64_NEW_RENDER_STATE)
+	 mach64ChooseRenderState( ctx );
+
+      mmesa->NewGLState = 0;
+   }
+
+   _tnl_run_pipeline( ctx );
+}
+
+/**********************************************************************/
+/*                 High level hooks for t_vb_render.c                 */
+/**********************************************************************/
+
+/* This is called when Mesa switches between rendering triangle
+ * primitives (such as GL_POLYGON, GL_QUADS, GL_TRIANGLE_STRIP, etc),
+ * and lines, points and bitmaps.
+ */
+
+static void mach64RasterPrimitive( GLcontext *ctx, GLuint hwprim )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   mmesa->new_state |= MACH64_NEW_CONTEXT;
+   mmesa->dirty |= MACH64_UPLOAD_CONTEXT;
+
+   if (mmesa->hw_primitive != hwprim) {
+      FLUSH_BATCH( mmesa );
+      mmesa->hw_primitive = hwprim;
+   }
+}
+
+static void mach64RenderPrimitive( GLcontext *ctx, GLenum prim )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint hw = hw_prim[prim];
+
+   mmesa->render_primitive = prim;
+
+   if (prim >= GL_TRIANGLES && (ctx->_TriangleCaps & DD_TRI_UNFILLED))
+      return;
+
+   mach64RasterPrimitive( ctx, hw );
+}
+
+
+static void mach64RenderStart( GLcontext *ctx )
+{
+   /* Check for projective texturing.  Make sure all texcoord
+    * pointers point to something.  (fix in mesa?)
+    */
+   mach64CheckTexSizes( ctx );
+}
+
+static void mach64RenderFinish( GLcontext *ctx )
+{
+   if (MACH64_CONTEXT(ctx)->RenderIndex & MACH64_FALLBACK_BIT)
+      _swrast_flush( ctx );
+}
+
+
+/**********************************************************************/
+/*           Transition to/from hardware rasterization.               */
+/**********************************************************************/
+
+void mach64Fallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint oldfallback = mmesa->Fallback;
+
+   if (mode) {
+      if (MACH64_DEBUG & DEBUG_VERBOSE_MSG)
+	 fprintf(stderr,"Set Fallback: %d\n", bit);
+      mmesa->Fallback |= bit;
+      if (oldfallback == 0) {
+	 FLUSH_BATCH( mmesa );
+	 _swsetup_Wakeup( ctx );
+	 mmesa->RenderIndex = ~0;
+      }
+   }
+   else {
+      if (MACH64_DEBUG & DEBUG_VERBOSE_MSG)
+	 fprintf(stderr,"Clear Fallback: %d\n", bit);
+      mmesa->Fallback &= ~bit;
+      if (oldfallback == bit) {
+	 _swrast_flush( ctx );
+	 tnl->Driver.Render.Start = mach64RenderStart;
+	 tnl->Driver.Render.PrimitiveNotify = mach64RenderPrimitive;
+	 tnl->Driver.Render.Finish = mach64RenderFinish;
+	 tnl->Driver.Render.BuildVertices = mach64BuildVertices;
+	 mmesa->NewGLState |= (_MACH64_NEW_RENDER_STATE|
+			       _MACH64_NEW_VERTEX_STATE);
+      }
+   }
+}
+
+/**********************************************************************/
+/*                            Initialization.                         */
+/**********************************************************************/
+
+void mach64InitTriFuncs( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   static int firsttime = 1;
+
+   if (firsttime) {
+      init_rast_tab();
+      firsttime = 0;
+   }
+
+   tnl->Driver.RunPipeline = mach64RunPipeline;
+   tnl->Driver.Render.Start = mach64RenderStart;
+   tnl->Driver.Render.Finish = mach64RenderFinish;
+   tnl->Driver.Render.PrimitiveNotify = mach64RenderPrimitive;
+   tnl->Driver.Render.ResetLineStipple = _swrast_ResetLineStipple;
+   tnl->Driver.Render.BuildVertices = mach64BuildVertices;
+
+/*     mach64Fallback( ctx, 0x100000, 1 ); */
+}
diff --git a/src/mesa/drivers/dri/mach64/mach64_tris.h b/src/mesa/drivers/dri/mach64/mach64_tris.h
new file mode 100644
index 0000000000..208703289d
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_tris.h
@@ -0,0 +1,43 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#ifndef __MACH64_TRIS_H__
+#define __MACH64_TRIS_H__
+
+#include "mtypes.h"
+
+extern void mach64InitTriFuncs( GLcontext *ctx );
+
+
+extern void mach64Fallback( GLcontext *ctx, GLuint bit, GLboolean mode );
+#define FALLBACK( mmesa, bit, mode ) mach64Fallback( mmesa->glCtx, bit, mode )
+
+
+#endif /* __MACH64_TRIS_H__ */
diff --git a/src/mesa/drivers/dri/mach64/mach64_vb.c b/src/mesa/drivers/dri/mach64/mach64_vb.c
new file mode 100644
index 0000000000..8a132b4f7b
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_vb.c
@@ -0,0 +1,644 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "glheader.h"
+#include "mtypes.h"
+#include "imports.h"
+#include "macros.h"
+#include "colormac.h"
+
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/t_context.h"
+
+#include "mach64_context.h"
+#include "mach64_vb.h"
+#include "mach64_ioctl.h"
+#include "mach64_tris.h"
+#include "mach64_state.h"
+
+
+#define MACH64_TEX1_BIT       0x1
+#define MACH64_TEX0_BIT       0x2
+#define MACH64_RGBA_BIT       0x4
+#define MACH64_SPEC_BIT       0x8
+#define MACH64_FOG_BIT        0x10
+#define MACH64_XYZW_BIT       0x20
+#define MACH64_PTEX_BIT       0x40
+#define MACH64_MAX_SETUP      0x80
+
+static struct {
+   void                (*emit)( GLcontext *, GLuint, GLuint, void *, GLuint );
+   interp_func		interp;
+   copy_pv_func	        copy_pv;
+   GLboolean           (*check_tex_sizes)( GLcontext *ctx );
+   GLuint               vertex_size;
+   GLuint               vertex_stride_shift;
+   GLuint               vertex_format;
+} setup_tab[MACH64_MAX_SETUP];
+
+#define TINY_VERTEX_FORMAT      1
+#define NOTEX_VERTEX_FORMAT     2
+#define TEX0_VERTEX_FORMAT      3
+#define TEX1_VERTEX_FORMAT      4
+#define PROJ_TEX1_VERTEX_FORMAT 0
+#define TEX2_VERTEX_FORMAT      0
+#define TEX3_VERTEX_FORMAT      0
+#define PROJ_TEX3_VERTEX_FORMAT 0
+
+#define DO_XYZW (IND & MACH64_XYZW_BIT)
+#define DO_RGBA (IND & MACH64_RGBA_BIT)
+#define DO_SPEC (IND & MACH64_SPEC_BIT)
+#define DO_FOG  (IND & MACH64_FOG_BIT)
+#define DO_TEX0 (IND & MACH64_TEX0_BIT)
+#define DO_TEX1 (IND & MACH64_TEX1_BIT)
+#define DO_TEX2 0
+#define DO_TEX3 0
+#define DO_PTEX (IND & MACH64_PTEX_BIT)
+
+#define VERTEX mach64Vertex
+#define LOCALVARS mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+#define GET_VIEWPORT_MAT() mmesa->hw_viewport
+#define GET_TEXSOURCE(n)  mmesa->tmu_source[n]
+#define GET_VERTEX_FORMAT() mmesa->vertex_format
+#define GET_VERTEX_STORE() mmesa->verts
+#define GET_VERTEX_STRIDE_SHIFT() mmesa->vertex_stride_shift
+
+#define HAVE_HW_VIEWPORT    0
+#define HAVE_HW_DIVIDE      0
+#define HAVE_RGBA_COLOR     0
+#define HAVE_TINY_VERTICES  1
+#define HAVE_NOTEX_VERTICES 1
+#define HAVE_TEX0_VERTICES  1
+#define HAVE_TEX1_VERTICES  1
+#define HAVE_TEX2_VERTICES  0
+#define HAVE_TEX3_VERTICES  0
+#define HAVE_PTEX_VERTICES  0
+
+#define UNVIEWPORT_VARS						\
+   const GLfloat dx = - (GLfloat)mmesa->drawX - SUBPIXEL_X;	\
+   const GLfloat dy = (mmesa->driDrawable->h +			\
+		       (GLfloat)mmesa->drawY  + SUBPIXEL_Y);	\
+   const GLfloat sz = 1.0 / mmesa->depth_scale
+
+#if MACH64_NATIVE_VTXFMT
+   
+#define UNVIEWPORT_X(x)    ((GLfloat)(x) / 4.0)  +  dx
+#define UNVIEWPORT_Y(y)  - ((GLfloat)(y) / 4.0)  +  dy
+#define UNVIEWPORT_Z(z)    (GLfloat)((z) >> 15)  *  sz
+
+#else
+
+#define UNVIEWPORT_X(x)    x  +  dx;
+#define UNVIEWPORT_Y(y)  - y  +  dy;
+#define UNVIEWPORT_Z(z)    z  *  sz;
+
+#endif
+
+#define PTEX_FALLBACK() FALLBACK(MACH64_CONTEXT(ctx), MACH64_FALLBACK_TEXTURE, 1)
+
+#define IMPORT_FLOAT_COLORS mach64_import_float_colors
+#define IMPORT_FLOAT_SPEC_COLORS mach64_import_float_spec_colors
+
+#define INTERP_VERTEX setup_tab[mmesa->SetupIndex].interp
+#define COPY_PV_VERTEX setup_tab[mmesa->SetupIndex].copy_pv
+
+/***********************************************************************
+ *         Generate  pv-copying and translation functions              *
+ ***********************************************************************/
+
+#if MACH64_NATIVE_VTXFMT
+
+#define TAG(x) mach64_##x
+#include "mach64_native_vb.c"
+
+#else
+
+#define TAG(x) mach64_##x
+#include "tnl_dd/t_dd_vb.c"
+
+#endif
+
+/***********************************************************************
+ *             Generate vertex emit and interp functions               *
+ ***********************************************************************/
+
+
+#if MACH64_NATIVE_VTXFMT
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT)
+#define TAG(x) x##_wg
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT)
+#define TAG(x) x##_wgs
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_wgt0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_wgt0t1
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_PTEX_BIT)
+#define TAG(x) x##_wgpt0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_wgst0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\
+             MACH64_TEX1_BIT)
+#define TAG(x) x##_wgst0t1
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\
+             MACH64_PTEX_BIT)
+#define TAG(x) x##_wgspt0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT)
+#define TAG(x) x##_wgf
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT)
+#define TAG(x) x##_wgfs
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_wgft0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|\
+             MACH64_TEX1_BIT)
+#define TAG(x) x##_wgft0t1
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|\
+             MACH64_PTEX_BIT)
+#define TAG(x) x##_wgfpt0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\
+             MACH64_TEX0_BIT)
+#define TAG(x) x##_wgfst0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\
+             MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_wgfst0t1
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\
+             MACH64_TEX0_BIT|MACH64_PTEX_BIT)
+#define TAG(x) x##_wgfspt0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_TEX0_BIT)
+#define TAG(x) x##_t0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_t0t1
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_FOG_BIT)
+#define TAG(x) x##_f
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_FOG_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_ft0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_FOG_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_ft0t1
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT)
+#define TAG(x) x##_g
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT)
+#define TAG(x) x##_gs
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_gt0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_gt0t1
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_gst0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_gst0t1
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT)
+#define TAG(x) x##_gf
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT)
+#define TAG(x) x##_gfs
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_gft0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_gft0t1
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_gfst0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\
+             MACH64_TEX1_BIT)
+#define TAG(x) x##_gfst0t1
+#include "mach64_native_vbtmp.h"
+
+#else
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT)
+#define TAG(x) x##_wg
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT)
+#define TAG(x) x##_wgs
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_wgt0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_wgt0t1
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_PTEX_BIT)
+#define TAG(x) x##_wgpt0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_wgst0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\
+             MACH64_TEX1_BIT)
+#define TAG(x) x##_wgst0t1
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\
+             MACH64_PTEX_BIT)
+#define TAG(x) x##_wgspt0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT)
+#define TAG(x) x##_wgf
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT)
+#define TAG(x) x##_wgfs
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_wgft0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|\
+             MACH64_TEX1_BIT)
+#define TAG(x) x##_wgft0t1
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|\
+             MACH64_PTEX_BIT)
+#define TAG(x) x##_wgfpt0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\
+             MACH64_TEX0_BIT)
+#define TAG(x) x##_wgfst0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\
+             MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_wgfst0t1
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\
+             MACH64_TEX0_BIT|MACH64_PTEX_BIT)
+#define TAG(x) x##_wgfspt0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_TEX0_BIT)
+#define TAG(x) x##_t0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_t0t1
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_FOG_BIT)
+#define TAG(x) x##_f
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_FOG_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_ft0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_FOG_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_ft0t1
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT)
+#define TAG(x) x##_g
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT)
+#define TAG(x) x##_gs
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_gt0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_gt0t1
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_gst0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_gst0t1
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT)
+#define TAG(x) x##_gf
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT)
+#define TAG(x) x##_gfs
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_gft0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_gft0t1
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_gfst0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\
+             MACH64_TEX1_BIT)
+#define TAG(x) x##_gfst0t1
+#include "mach64_vbtmp.h"
+
+#endif
+
+static void init_setup_tab( void )
+{
+   init_wg();
+   init_wgs();
+   init_wgt0();
+   init_wgt0t1();
+   init_wgpt0();
+   init_wgst0();
+   init_wgst0t1();
+   init_wgspt0();
+   init_wgf();
+   init_wgfs();
+   init_wgft0();
+   init_wgft0t1();
+   init_wgfpt0();
+   init_wgfst0();
+   init_wgfst0t1();
+   init_wgfspt0();
+   init_t0();
+   init_t0t1();
+   init_f();
+   init_ft0();
+   init_ft0t1();
+   init_g();
+   init_gs();
+   init_gt0();
+   init_gt0t1();
+   init_gst0();
+   init_gst0t1();
+   init_gf();
+   init_gfs();
+   init_gft0();
+   init_gft0t1();
+   init_gfst0();
+   init_gfst0t1();
+}
+
+
+
+void mach64PrintSetupFlags( char *msg, GLuint flags )
+{
+   fprintf( stderr, "%s: %d %s%s%s%s%s%s%s\n",
+	    msg,
+	    (int)flags,
+	    (flags & MACH64_XYZW_BIT)	? " xyzw," : "",
+	    (flags & MACH64_RGBA_BIT)	? " rgba," : "",
+	    (flags & MACH64_SPEC_BIT)	? " spec," : "",
+	    (flags & MACH64_FOG_BIT)	? " fog," : "",
+	    (flags & MACH64_TEX0_BIT)	? " tex-0," : "",
+	    (flags & MACH64_TEX1_BIT)	? " tex-1," : "",
+	    (flags & MACH64_PTEX_BIT)	? " ptex," : "");
+}
+
+
+
+
+void mach64CheckTexSizes( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT( ctx );
+
+   if (!setup_tab[mmesa->SetupIndex].check_tex_sizes(ctx)) {
+      TNLcontext *tnl = TNL_CONTEXT(ctx);
+
+      /* Invalidate stored verts
+       */
+      mmesa->SetupNewInputs = ~0;
+      mmesa->SetupIndex |= MACH64_PTEX_BIT;
+
+      if (!mmesa->Fallback &&
+	  !(ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED))) {
+	 tnl->Driver.Render.Interp = setup_tab[mmesa->SetupIndex].interp;
+	 tnl->Driver.Render.CopyPV = setup_tab[mmesa->SetupIndex].copy_pv;
+      }
+   }
+}
+
+void mach64BuildVertices( GLcontext *ctx,
+			GLuint start,
+			GLuint count,
+			GLuint newinputs )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT( ctx );
+   GLubyte *v = ((GLubyte *)mmesa->verts + (start<<mmesa->vertex_stride_shift));
+   GLuint stride = 1<<mmesa->vertex_stride_shift;
+
+   newinputs |= mmesa->SetupNewInputs;
+   mmesa->SetupNewInputs = 0;
+
+   if (!newinputs)
+      return;
+
+   if (newinputs & VERT_BIT_POS) {
+      setup_tab[mmesa->SetupIndex].emit( ctx, start, count, v, stride );
+   } else {
+      GLuint ind = 0;
+
+      if (newinputs & VERT_BIT_COLOR0)
+	 ind |= MACH64_RGBA_BIT;
+
+      if (newinputs & VERT_BIT_COLOR1)
+	 ind |= MACH64_SPEC_BIT;
+
+      if (newinputs & VERT_BIT_TEX0)
+	 ind |= MACH64_TEX0_BIT;
+
+      if (newinputs & VERT_BIT_TEX1)
+	 ind |= MACH64_TEX1_BIT;
+
+      if (newinputs & VERT_BIT_FOG)
+	 ind |= MACH64_FOG_BIT;
+
+      if (mmesa->SetupIndex & MACH64_PTEX_BIT)
+	 ind = ~0;
+
+      ind &= mmesa->SetupIndex;
+
+      if (ind) {
+	 setup_tab[ind].emit( ctx, start, count, v, stride );
+      }
+   }
+}
+
+void mach64ChooseVertexState( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   mach64ContextPtr mmesa = MACH64_CONTEXT( ctx );
+   GLuint ind = MACH64_XYZW_BIT|MACH64_RGBA_BIT;
+   
+   if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR)
+      ind |= MACH64_SPEC_BIT;
+
+   if (ctx->Fog.Enabled)
+      ind |= MACH64_FOG_BIT;
+
+   if (ctx->Texture._EnabledUnits) {
+      ind |= MACH64_TEX0_BIT;
+      if (ctx->Texture.Unit[0]._ReallyEnabled &&
+	  ctx->Texture.Unit[1]._ReallyEnabled) {
+	 ind |= MACH64_TEX1_BIT;
+      }
+   }
+
+   mmesa->SetupIndex = ind;
+
+   if (ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED)) {
+      tnl->Driver.Render.Interp = mach64_interp_extras;
+      tnl->Driver.Render.CopyPV = mach64_copy_pv_extras;
+   } else {
+      tnl->Driver.Render.Interp = setup_tab[ind].interp;
+      tnl->Driver.Render.CopyPV = setup_tab[ind].copy_pv;
+   }
+
+#if 0
+   if (MACH64_DEBUG & DEBUG_VERBOSE_MSG) {
+      mach64PrintSetupFlags( __FUNCTION__, ind );
+  }
+#endif
+
+   if (setup_tab[ind].vertex_format != mmesa->vertex_format) {
+      FLUSH_BATCH(mmesa);
+      mmesa->vertex_format = setup_tab[ind].vertex_format;
+      mmesa->vertex_size = setup_tab[ind].vertex_size;
+      mmesa->vertex_stride_shift = setup_tab[ind].vertex_stride_shift;
+   }
+}
+
+
+#if 0
+void mach64_emit_contiguous_verts( GLcontext *ctx,
+				 GLuint start,
+				 GLuint count )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint vertex_size = mmesa->vertex_size * 4;
+   GLuint *dest = mach64AllocDmaLow( mmesa, (count-start) * vertex_size);
+   setup_tab[mmesa->SetupIndex].emit( ctx, start, count, dest, vertex_size );
+}
+#endif
+
+
+void mach64InitVB( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint size = TNL_CONTEXT(ctx)->vb.Size;
+
+   mmesa->verts = (char *)ALIGN_MALLOC(size * 4 * 16, 32);
+
+   {
+      static int firsttime = 1;
+      if (firsttime) {
+	 init_setup_tab();
+	 firsttime = 0;
+      }
+   }
+}
+
+
+void mach64FreeVB( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   if (mmesa->verts) {
+      ALIGN_FREE(mmesa->verts);
+      mmesa->verts = 0;
+   }
+}
diff --git a/src/mesa/drivers/dri/mach64/mach64_vb.h b/src/mesa/drivers/dri/mach64/mach64_vb.h
new file mode 100644
index 0000000000..bcc4759af3
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_vb.h
@@ -0,0 +1,77 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#ifndef __MACH64_VB_H__
+#define __MACH64_VB_H__
+
+#include "mtypes.h"
+#include "swrast/swrast.h"
+#include "mach64_context.h"
+
+/* premultiply texture coordinates by homogenous coordinate */
+#define MACH64_PREMULT_TEXCOORDS
+
+#define _MACH64_NEW_VERTEX_STATE (_DD_NEW_SEPARATE_SPECULAR |          \
+                               _DD_NEW_TRI_LIGHT_TWOSIDE |             \
+                               _DD_NEW_TRI_UNFILLED |                  \
+                               _NEW_TEXTURE |                          \
+                               _NEW_FOG)
+
+
+extern void mach64CheckTexSizes( GLcontext *ctx );
+extern void mach64ChooseVertexState( GLcontext *ctx );
+
+extern void mach64BuildVertices( GLcontext *ctx, GLuint start, GLuint count,
+				   GLuint newinputs );
+
+extern void mach64PrintSetupFlags(char *msg, GLuint flags );
+
+extern void mach64InitVB( GLcontext *ctx );
+extern void mach64FreeVB( GLcontext *ctx );
+
+#if 0
+extern void mach64_emit_contiguous_verts( GLcontext *ctx,
+					    GLuint start,
+					    GLuint count );
+
+extern void mach64_emit_indexed_verts( GLcontext *ctx,
+					 GLuint start,
+					 GLuint count );
+#endif
+
+extern void mach64_translate_vertex( GLcontext *ctx,
+				       const mach64Vertex *src,
+				       SWvertex *dst );
+
+extern void mach64_print_vertex( GLcontext *ctx, const mach64Vertex *v );
+
+
+#endif /* __MACH64_VB_H__ */
diff --git a/src/mesa/drivers/dri/mach64/mach64_vbtmp.h b/src/mesa/drivers/dri/mach64/mach64_vbtmp.h
new file mode 100644
index 0000000000..590af1b546
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_vbtmp.h
@@ -0,0 +1,908 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keithw@valinux.com>
+ *
+ * Modified for mach64 by:
+ *    Leif Delgass <ldelgass@retinalburn.net>
+ *    Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+
+/* Unlike the other templates here, this assumes quite a bit about the
+ * underlying hardware.  Specifically it assumes a d3d-like vertex
+ * format, with a layout more or less constrained to look like the
+ * following:
+ *
+ * union {
+ *    struct {
+ *        float x, y, z, w;
+ *        struct { char r, g, b, a; } color;
+ *        struct { char r, g, b, fog; } spec;
+ *        float u0, v0;
+ *        float u1, v1;
+ *        float u2, v2;
+ *        float u3, v3;
+ *    } v;
+ *    struct {
+ *        float x, y, z, w;
+ *        struct { char r, g, b, a; } color;
+ *        struct { char r, g, b, fog; } spec;
+ *        float u0, v0, q0;
+ *        float u1, v1, q1;
+ *        float u2, v2, q2;
+ *        float u3, v3, q3;
+ *    } pv;
+ *    struct {
+ *        float x, y, z;
+ *        struct { char r, g, b, a; } color;
+ *    } tv;
+ *    float f[16];
+ *    unsigned int ui[16];
+ *    unsigned char ub4[4][16];
+ * }
+ *
+
+ * DO_XYZW:  Emit xyz and maybe w coordinates.
+ * DO_RGBA:  Emit color.
+ * DO_SPEC:  Emit specular color.
+ * DO_FOG:   Emit fog coordinate in specular alpha.
+ * DO_TEX0:  Emit tex0 u,v coordinates.
+ * DO_TEX1:  Emit tex1 u,v coordinates.
+ * DO_TEX2:  Emit tex2 u,v coordinates.
+ * DO_TEX3:  Emit tex3 u,v coordinates.
+ * DO_PTEX:  Emit tex0,1,2,3 q coordinates where possible.
+ *
+ * HAVE_RGBA_COLOR: Hardware takes color in rgba order (else bgra).
+ *
+ * HAVE_HW_VIEWPORT:  Hardware performs viewport transform.
+ * HAVE_HW_DIVIDE:  Hardware performs perspective divide.
+ *
+ * HAVE_TINY_VERTICES:  Hardware understands v.tv format.
+ * HAVE_PTEX_VERTICES:  Hardware understands v.pv format.
+ * HAVE_NOTEX_VERTICES:  Hardware understands v.v format with texcount 0.
+ *
+ * Additionally, this template assumes it is emitting *transformed*
+ * vertices; the modifications to emit untransformed vertices (ie. to
+ * t&l hardware) are probably too great to cooexist with the code
+ * already in this file.
+ *
+ * NOTE: The PTEX vertex format always includes TEX0 and TEX1, even if
+ * only TEX0 is enabled, in order to maintain a vertex size which is
+ * an exact number of quadwords.
+ */
+
+#if (HAVE_HW_VIEWPORT)
+#define VIEWPORT_X(dst,x) dst = x
+#define VIEWPORT_Y(dst,y) dst = y
+#define VIEWPORT_Z(dst,z) dst = z
+#else
+#define VIEWPORT_X(dst,x) dst = s[0]  * x + s[12]
+#define VIEWPORT_Y(dst,y) dst = s[5]  * y + s[13]
+#define VIEWPORT_Z(dst,z) dst = s[10] * z + s[14]
+#endif
+
+#if (HAVE_HW_DIVIDE && !HAVE_PTEX_VERTICES)
+#error "can't cope with this combination" 
+#endif 
+
+#ifndef LOCALVARS
+#define LOCALVARS
+#endif
+
+#ifndef CHECK_HW_DIVIDE
+#define CHECK_HW_DIVIDE 1
+#endif
+
+#if (HAVE_HW_DIVIDE || DO_SPEC || DO_TEX0 || DO_FOG || !HAVE_TINY_VERTICES)
+
+static void TAG(emit)( GLcontext *ctx,
+		       GLuint start, GLuint end,
+		       void *dest,
+		       GLuint stride )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLfloat (*tc0)[4], (*tc1)[4], (*fog)[4];
+   GLfloat (*tc2)[4], (*tc3)[4];
+   GLubyte (*col)[4], (*spec)[4];
+   GLuint tc0_stride, tc1_stride, col_stride, spec_stride, fog_stride;
+   GLuint tc2_stride, tc3_stride;
+   GLuint tc0_size, tc1_size;
+   GLuint tc2_size, tc3_size;
+   GLfloat (*coord)[4];
+   GLuint coord_stride;
+   VERTEX *v = (VERTEX *)dest;
+   const GLfloat *s = GET_VIEWPORT_MAT();
+   const GLubyte *mask = VB->ClipMask;
+   int i;
+
+/*     fprintf(stderr, "%s(big) importable %d %d..%d\n",  */
+/*  	   __FUNCTION__, VB->importable_data, start, end); */
+
+   if (HAVE_HW_VIEWPORT && HAVE_HW_DIVIDE && CHECK_HW_DIVIDE) {
+      (void) s;
+      coord = VB->ClipPtr->data;
+      coord_stride = VB->ClipPtr->stride;
+   }
+   else {
+      coord = VB->NdcPtr->data;
+      coord_stride = VB->NdcPtr->stride;
+   }
+
+   if (DO_TEX3) {
+      const GLuint t3 = GET_TEXSOURCE(3);
+      tc3 = VB->TexCoordPtr[t3]->data;
+      tc3_stride = VB->TexCoordPtr[t3]->stride;
+      if (DO_PTEX)
+	 tc3_size = VB->TexCoordPtr[t3]->size;
+   }
+
+   if (DO_TEX2) {
+      const GLuint t2 = GET_TEXSOURCE(2);
+      tc2 = VB->TexCoordPtr[t2]->data;
+      tc2_stride = VB->TexCoordPtr[t2]->stride;
+      if (DO_PTEX)
+	 tc2_size = VB->TexCoordPtr[t2]->size;
+   }
+
+   if (DO_TEX1) {
+      const GLuint t1 = GET_TEXSOURCE(1);
+      tc1 = VB->TexCoordPtr[t1]->data;
+      tc1_stride = VB->TexCoordPtr[t1]->stride;
+      if (DO_PTEX)
+	 tc1_size = VB->TexCoordPtr[t1]->size;
+   }
+
+   if (DO_TEX0) {
+      const GLuint t0 = GET_TEXSOURCE(0);
+      tc0_stride = VB->TexCoordPtr[t0]->stride;
+      tc0 = VB->TexCoordPtr[t0]->data;
+      if (DO_PTEX) 
+	 tc0_size = VB->TexCoordPtr[t0]->size;
+   }
+
+   if (DO_RGBA) {
+      if (VB->ColorPtr[0]->Type != GL_UNSIGNED_BYTE)
+	 IMPORT_FLOAT_COLORS( ctx );
+      col = (GLubyte (*)[4])VB->ColorPtr[0]->Ptr;
+      col_stride = VB->ColorPtr[0]->StrideB;
+   }
+
+   if (DO_SPEC) {
+      if (VB->SecondaryColorPtr[0]->Type != GL_UNSIGNED_BYTE)
+	 IMPORT_FLOAT_SPEC_COLORS( ctx );
+      spec = (GLubyte (*)[4])VB->SecondaryColorPtr[0]->Ptr;
+      spec_stride = VB->SecondaryColorPtr[0]->StrideB;
+   }
+
+   if (DO_FOG) {
+      if (VB->FogCoordPtr) {
+	 fog = VB->FogCoordPtr->data;
+	 fog_stride = VB->FogCoordPtr->stride;
+      } else {
+	 static GLfloat tmp[4] = {0, 0, 0, 0};
+	 fog = &tmp;
+	 fog_stride = 0;
+      }
+   }
+
+   if (VB->importable_data || (DO_SPEC && !spec_stride) || (DO_FOG && !fog_stride)) {
+      /* May have nonstandard strides:
+       */
+      if (start) {
+	 coord =  (GLfloat (*)[4])((GLubyte *)coord + start * coord_stride);
+	 if (DO_TEX0)
+	    tc0 =  (GLfloat (*)[4])((GLubyte *)tc0 + start * tc0_stride);
+	 if (DO_TEX1) 
+	    tc1 =  (GLfloat (*)[4])((GLubyte *)tc1 + start * tc1_stride);
+	 if (DO_TEX2) 
+	    tc2 =  (GLfloat (*)[4])((GLubyte *)tc2 + start * tc2_stride);
+	 if (DO_TEX3) 
+	    tc3 =  (GLfloat (*)[4])((GLubyte *)tc3 + start * tc3_stride);
+	 if (DO_RGBA) 
+	    STRIDE_4UB(col, start * col_stride);
+	 if (DO_SPEC)
+	    STRIDE_4UB(spec, start * spec_stride);
+	 if (DO_FOG)
+	    fog =  (GLfloat (*)[4])((GLubyte *)fog + start * fog_stride);
+	    /*  STRIDE_F(fog, start * fog_stride); */
+      }
+
+      for (i=start; i < end; i++, v = (VERTEX *)((GLubyte *)v + stride)) {
+	 if (DO_XYZW) {
+	    if (HAVE_HW_VIEWPORT || mask[i] == 0) {
+	       /* unclipped */
+	       VIEWPORT_X(v->v.x, coord[0][0]);
+	       VIEWPORT_Y(v->v.y, coord[0][1]);
+	       VIEWPORT_Z(v->v.z, coord[0][2]);
+	       v->v.w = coord[0][3];
+	    } else {
+	       /* clipped */
+	       v->v.w = 1.0;
+	    }
+	    if (MACH64_DEBUG & DEBUG_VERBOSE_PRIMS) {
+	       fprintf(stderr, "%s: vert (importable) %d: %.2f %.2f %.2f %f\n", 
+		       __FUNCTION__, i, v->v.x, v->v.y, v->v.z, v->v.w);
+	    }
+	    coord =  (GLfloat (*)[4])((GLubyte *)coord +  coord_stride);
+	 }
+	 if (DO_RGBA) {
+	    if (HAVE_RGBA_COLOR) {
+	       *(GLuint *)&v->v.color = *(GLuint *)&col[0];
+	       STRIDE_4UB(col, col_stride);
+	    } else {
+	       v->v.color.blue  = col[0][2];
+	       v->v.color.green = col[0][1];
+	       v->v.color.red   = col[0][0];
+	       v->v.color.alpha = col[0][3];
+	       STRIDE_4UB(col, col_stride);
+	    }
+	 }
+	 if (DO_SPEC) {
+	    v->v.specular.red = spec[0][0];
+	    v->v.specular.green = spec[0][1];
+	    v->v.specular.blue = spec[0][2];
+	    STRIDE_4UB(spec, spec_stride);
+	 }
+	 if (DO_FOG) {
+	    v->v.specular.alpha = fog[0][0] * 255.0;
+	    /*  STRIDE_F(fog, fog_stride); */
+	    fog =  (GLfloat (*)[4])((GLubyte *)fog + fog_stride);
+	 }
+	 if (DO_TEX0) {
+	    v->v.u0 = tc0[0][0];
+	    v->v.v0 = tc0[0][1];
+	    if (MACH64_DEBUG & DEBUG_VERBOSE_PRIMS) {
+	       fprintf(stderr, "%s: vert (importable) %d: u0: %.2f, v0: %.2f, w: %f\n", 
+		       __FUNCTION__, i, v->v.u0, v->v.v0, v->v.w);
+	    }
+#ifdef MACH64_PREMULT_TEXCOORDS
+	    v->v.u0 *= v->v.w;
+	    v->v.v0 *= v->v.w;
+#endif
+	    if (DO_PTEX) {
+	       if (HAVE_PTEX_VERTICES) {
+		  if (tc0_size == 4) 
+		     v->pv.q0 = tc0[0][3];
+		  else
+		     v->pv.q0 = 1.0;
+	       } 
+	       else if (tc0_size == 4) {
+#ifdef MACH64_PREMULT_TEXCOORDS
+		  v->v.w *= tc0[0][3];
+#else
+		  float rhw = 1.0 / tc0[0][3];
+		  v->v.w *= tc0[0][3];
+		  v->v.u0 *= rhw;
+		  v->v.v0 *= rhw;
+#endif
+	       } 
+	    } 
+	    tc0 =  (GLfloat (*)[4])((GLubyte *)tc0 +  tc0_stride);
+	 }
+	 if (DO_TEX1) {
+	    if (DO_PTEX) {
+	       v->pv.u1 = tc1[0][0];
+	       v->pv.v1 = tc1[0][1];
+	       if (tc1_size == 4) 
+		  v->pv.q1 = tc1[0][3];
+	       else
+		  v->pv.q1 = 1.0;
+	    } 
+	    else {
+	       v->v.u1 = tc1[0][0];
+	       v->v.v1 = tc1[0][1];
+	    }
+#ifdef MACH64_PREMULT_TEXCOORDS
+	    v->v.u1 *= v->v.w;
+	    v->v.v1 *= v->v.w;
+#endif
+	    tc1 =  (GLfloat (*)[4])((GLubyte *)tc1 +  tc1_stride);
+	 } 
+	 else if (DO_PTEX) {
+	    *(GLuint *)&v->pv.q1 = 0;	/* avoid culling on radeon */
+	 }
+	 if (DO_TEX2) {
+	    if (DO_PTEX) {
+	       v->pv.u2 = tc2[0][0];
+	       v->pv.v2 = tc2[0][1];
+	       if (tc2_size == 4) 
+		  v->pv.q2 = tc2[0][3];
+	       else
+		  v->pv.q2 = 1.0;
+	    } 
+	    else {
+	       v->v.u2 = tc2[0][0];
+	       v->v.v2 = tc2[0][1];
+	    }
+	    tc2 =  (GLfloat (*)[4])((GLubyte *)tc2 +  tc2_stride);
+	 } 
+	 if (DO_TEX3) {
+	    if (DO_PTEX) {
+	       v->pv.u3 = tc3[0][0];
+	       v->pv.v3 = tc3[0][1];
+	       if (tc3_size == 4) 
+		  v->pv.q3 = tc3[0][3];
+	       else
+		  v->pv.q3 = 1.0;
+	    } 
+	    else {
+	       v->v.u3 = tc3[0][0];
+	       v->v.v3 = tc3[0][1];
+	    }
+	    tc3 =  (GLfloat (*)[4])((GLubyte *)tc3 +  tc3_stride);
+	 } 
+      }
+   }
+   else {
+      for (i=start; i < end; i++, v = (VERTEX *)((GLubyte *)v + stride)) {
+	 if (DO_XYZW) {
+	    if (HAVE_HW_VIEWPORT || mask[i] == 0) {
+	       /* unclipped */
+	       VIEWPORT_X(v->v.x, coord[i][0]);
+	       VIEWPORT_Y(v->v.y, coord[i][1]);
+	       VIEWPORT_Z(v->v.z, coord[i][2]);
+	       v->v.w = coord[i][3];
+	    } else {
+	       /* clipped */
+	       v->v.w = 1.0;
+	    }
+	    if (MACH64_DEBUG & DEBUG_VERBOSE_PRIMS) {
+	       fprintf(stderr, "%s: vert %d: %.2f %.2f %.2f %f\n", 
+		       __FUNCTION__, i, v->v.x, v->v.y, v->v.z, v->v.w);
+	    }
+	 }
+	 if (DO_RGBA) {
+	    if (HAVE_RGBA_COLOR) {
+	       *(GLuint *)&v->v.color = *(GLuint *)&col[i];
+	    }
+	    else {
+	       v->v.color.blue  = col[i][2];
+	       v->v.color.green = col[i][1];
+	       v->v.color.red   = col[i][0];
+	       v->v.color.alpha = col[i][3];
+	    }
+	 }
+	 if (DO_SPEC) {
+	    v->v.specular.red   = spec[i][0];
+	    v->v.specular.green = spec[i][1];
+	    v->v.specular.blue  = spec[i][2];
+	 }
+	 if (DO_FOG) {
+	    v->v.specular.alpha = fog[i][0] * 255.0;
+	 }
+	 if (DO_TEX0) {
+	    v->v.u0 = tc0[i][0];
+	    v->v.v0 = tc0[i][1];
+	    if (MACH64_DEBUG & DEBUG_VERBOSE_PRIMS) {
+	       fprintf(stderr, "%s: vert %d: u0: %.2f, v0: %.2f, w: %f\n", 
+		       __FUNCTION__, i, v->v.u0, v->v.v0, v->v.w);
+	    }
+#ifdef MACH64_PREMULT_TEXCOORDS
+	    v->v.u0 *= v->v.w;
+	    v->v.v0 *= v->v.w;
+#endif
+	    if (DO_PTEX) {
+	       if (HAVE_PTEX_VERTICES) {
+		  if (tc0_size == 4) 
+		     v->pv.q0 = tc0[i][3];
+		  else
+		     v->pv.q0 = 1.0;
+
+		  v->pv.q1 = 0;	/* radeon */
+	       } 
+	       else if (tc0_size == 4) {
+#ifdef MACH64_PREMULT_TEXCOORDS
+		  v->v.w *= tc0[i][3];
+#else
+		  float rhw = 1.0 / tc0[i][3];
+		  v->v.w *= tc0[i][3];
+		  v->v.u0 *= rhw;
+		  v->v.v0 *= rhw;
+#endif
+	       } 
+	    } 
+	 }
+	 if (DO_TEX1) {
+	    if (DO_PTEX) {
+	       v->pv.u1 = tc1[i][0];
+	       v->pv.v1 = tc1[i][1];
+	       if (tc1_size == 4) 
+		  v->pv.q1 = tc1[i][3];
+	       else
+		  v->pv.q1 = 1.0;
+	    } 
+	    else {
+	       v->v.u1 = tc1[i][0];
+	       v->v.v1 = tc1[i][1];
+	    }
+#ifdef MACH64_PREMULT_TEXCOORDS
+	    v->v.u1 *= v->v.w;
+	    v->v.v1 *= v->v.w;
+#endif
+	 }
+      }
+   }
+}
+#else
+#if DO_XYZW
+
+#if HAVE_HW_DIVIDE
+#error "cannot use tiny vertices with hw perspective divide"
+#endif
+
+static void TAG(emit)( GLcontext *ctx, GLuint start, GLuint end,
+		       void *dest, GLuint stride )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLubyte (*col)[4];
+   GLuint col_stride;
+   GLfloat (*coord)[4] = VB->NdcPtr->data;
+   GLuint coord_stride = VB->NdcPtr->stride;
+   GLfloat *v = (GLfloat *)dest;
+   const GLubyte *mask = VB->ClipMask;
+   const GLfloat *s = GET_VIEWPORT_MAT();
+   int i;
+
+   (void) s;
+
+   ASSERT(stride == 4);
+
+   if (VB->ColorPtr[0]->Type != GL_UNSIGNED_BYTE)
+      IMPORT_FLOAT_COLORS( ctx );
+
+   col = (GLubyte (*)[4])VB->ColorPtr[0]->Ptr;
+   col_stride = VB->ColorPtr[0]->StrideB;
+   ASSERT(VB->ColorPtr[0]->Type == GL_UNSIGNED_BYTE);
+
+/*     fprintf(stderr, "%s(small) importable %x\n",  */
+/*  	   __FUNCTION__, VB->importable_data); */
+
+   /* Pack what's left into a 4-dword vertex.  Color is in a different
+    * place, and there is no 'w' coordinate.
+    */
+   if (VB->importable_data) {
+      if (start) {
+	 coord =  (GLfloat (*)[4])((GLubyte *)coord + start * coord_stride);
+	 STRIDE_4UB(col, start * col_stride);
+      }
+
+      for (i=start; i < end; i++, v+=4) {
+	 if (HAVE_HW_VIEWPORT || mask[i] == 0) {
+	    VIEWPORT_X(v[0], coord[0][0]);
+	    VIEWPORT_Y(v[1], coord[0][1]);
+	    VIEWPORT_Z(v[2], coord[0][2]);
+	 }
+	 coord =  (GLfloat (*)[4])((GLubyte *)coord +  coord_stride);
+	 if (DO_RGBA) {
+	    if (HAVE_RGBA_COLOR) {
+	       *(GLuint *)&v[3] = *(GLuint *)col;
+	    }
+	    else {
+	       GLubyte *b = (GLubyte *)&v[3];
+	       b[0] = col[0][2];
+	       b[1] = col[0][1];
+	       b[2] = col[0][0];
+	       b[3] = col[0][3];
+	    }
+	    STRIDE_4UB( col, col_stride );
+	 }
+	 if (MACH64_DEBUG & DEBUG_VERBOSE_PRIMS) {
+	    fprintf(stderr, "vert (importable) %d: %.2f %.2f %.2f %x\n",
+		    i, v[0], v[1], v[2], *(int *)&v[3]);
+	 }
+      }
+   }
+   else {
+      for (i=start; i < end; i++, v+=4) {
+	 if (HAVE_HW_VIEWPORT || mask[i] == 0) {
+	    VIEWPORT_X(v[0], coord[i][0]);
+	    VIEWPORT_Y(v[1], coord[i][1]);
+	    VIEWPORT_Z(v[2], coord[i][2]);
+	 }
+	 if (DO_RGBA) {
+	    if (HAVE_RGBA_COLOR) {
+	       *(GLuint *)&v[3] = *(GLuint *)&col[i];
+	    }
+	    else {
+	       GLubyte *b = (GLubyte *)&v[3];
+	       b[0] = col[i][2];
+	       b[1] = col[i][1];
+	       b[2] = col[i][0];
+	       b[3] = col[i][3];
+	    }
+	 }
+	 if (MACH64_DEBUG & DEBUG_VERBOSE_PRIMS) {
+	    fprintf(stderr, "vert %d: %.2f %.2f %.2f %x\n",
+		    i, v[0], v[1], v[2], *(int *)&v[3]);
+	 }
+      }
+   }
+}
+#else
+static void TAG(emit)( GLcontext *ctx, GLuint start, GLuint end,
+		       void *dest, GLuint stride )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLubyte (*col)[4];
+   GLuint col_stride;
+   GLfloat *v = (GLfloat *)dest;
+   int i;
+
+   if (VB->ColorPtr[0]->Type != GL_UNSIGNED_BYTE)
+      IMPORT_FLOAT_COLORS( ctx );
+
+   col = VB->ColorPtr[0]->Ptr;
+   col_stride = VB->ColorPtr[0]->StrideB;
+
+   if (start)
+      STRIDE_4UB(col, col_stride * start);
+
+   /* Need to figure out where color is:
+    */
+   if (GET_VERTEX_FORMAT() == TINY_VERTEX_FORMAT)
+      v += 3;
+   else
+      v += 4;
+
+   for (i=start; i < end; i++, STRIDE_F(v, stride)) {
+      if (HAVE_RGBA_COLOR) {
+	 *(GLuint *)v = *(GLuint *)col[0];
+      }
+      else {
+	 GLubyte *b = (GLubyte *)v;
+	 b[0] = col[0][2];
+	 b[1] = col[0][1];
+	 b[2] = col[0][0];
+	 b[3] = col[0][3];
+      }
+      STRIDE_4UB( col, col_stride );
+   }
+}
+#endif /* emit */
+#endif /* emit */
+
+#if (DO_XYZW) && (DO_RGBA)
+
+
+#if (HAVE_PTEX_VERTICES)
+static GLboolean TAG(check_tex_sizes)( GLcontext *ctx )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+
+   /* Force 'missing' texcoords to something valid.
+    */
+   if (DO_TEX3 && VB->TexCoordPtr[2] == 0)
+      VB->TexCoordPtr[2] = VB->TexCoordPtr[3];
+
+   if (DO_TEX2 && VB->TexCoordPtr[1] == 0)
+      VB->TexCoordPtr[1] = VB->TexCoordPtr[2];
+
+   if (DO_TEX1 && VB->TexCoordPtr[0] == 0)
+      VB->TexCoordPtr[0] = VB->TexCoordPtr[1];
+
+   if (DO_PTEX)
+      return GL_TRUE;
+   
+   if ((DO_TEX3 && VB->TexCoordPtr[GET_TEXSOURCE(3)]->size == 4) ||
+       (DO_TEX2 && VB->TexCoordPtr[GET_TEXSOURCE(2)]->size == 4) ||
+       (DO_TEX1 && VB->TexCoordPtr[GET_TEXSOURCE(1)]->size == 4) ||
+       (DO_TEX0 && VB->TexCoordPtr[GET_TEXSOURCE(0)]->size == 4))
+      return GL_FALSE;
+
+   return GL_TRUE;
+}
+#else
+static GLboolean TAG(check_tex_sizes)( GLcontext *ctx )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+
+   /* Force 'missing' texcoords to something valid.
+    */
+   if (DO_TEX3 && VB->TexCoordPtr[2] == 0)
+      VB->TexCoordPtr[2] = VB->TexCoordPtr[3];
+
+   if (DO_TEX2 && VB->TexCoordPtr[1] == 0)
+      VB->TexCoordPtr[1] = VB->TexCoordPtr[2];
+
+   if (DO_TEX1 && VB->TexCoordPtr[0] == 0)
+      VB->TexCoordPtr[0] = VB->TexCoordPtr[1];
+
+   if (DO_PTEX)
+      return GL_TRUE;
+
+   /* No hardware support for projective texture.  Can fake it for
+    * TEX0 only.
+    */
+   if ((DO_TEX3 && VB->TexCoordPtr[GET_TEXSOURCE(3)]->size == 4) ||
+       (DO_TEX2 && VB->TexCoordPtr[GET_TEXSOURCE(2)]->size == 4) ||
+       (DO_TEX1 && VB->TexCoordPtr[GET_TEXSOURCE(1)]->size == 4)) {
+      PTEX_FALLBACK();
+      return GL_FALSE;
+   }
+
+   if (DO_TEX0 && VB->TexCoordPtr[GET_TEXSOURCE(0)]->size == 4) {
+      if (DO_TEX1 || DO_TEX2 || DO_TEX3) {
+	 PTEX_FALLBACK();
+      }
+      return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+#endif /* ptex */
+
+
+static void TAG(interp)( GLcontext *ctx,
+			 GLfloat t,
+			 GLuint edst, GLuint eout, GLuint ein,
+			 GLboolean force_boundary )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLubyte *ddverts = GET_VERTEX_STORE();
+   GLuint shift = GET_VERTEX_STRIDE_SHIFT();
+   const GLfloat *dstclip = VB->ClipPtr->data[edst];
+   GLfloat w;
+   const GLfloat *s = GET_VIEWPORT_MAT();
+
+   VERTEX *dst = (VERTEX *)(ddverts + (edst << shift));
+   VERTEX *in  = (VERTEX *)(ddverts + (ein << shift));
+   VERTEX *out = (VERTEX *)(ddverts + (eout << shift));
+
+   (void)s;
+
+   if (HAVE_HW_DIVIDE && CHECK_HW_DIVIDE) {
+      VIEWPORT_X( dst->v.x, dstclip[0] );
+      VIEWPORT_Y( dst->v.y, dstclip[1] );
+      VIEWPORT_Z( dst->v.z, dstclip[2] );
+      w = dstclip[3];
+   }
+   else {
+      w = (dstclip[3] == 0.0F) ? 1.0 : (1.0 / dstclip[3]);
+      VIEWPORT_X( dst->v.x, dstclip[0] * w );
+      VIEWPORT_Y( dst->v.y, dstclip[1] * w );
+      VIEWPORT_Z( dst->v.z, dstclip[2] * w );
+   }
+
+   if (MACH64_DEBUG & DEBUG_VERBOSE_PRIMS) {
+      fprintf( stderr, "%s: dst vert: %.2f %.2f %.2f %f\n",
+	       __FUNCTION__,
+	       dst->v.x,
+	       dst->v.y,
+	       dst->v.z,
+	       w );
+   }
+
+   if ((HAVE_HW_DIVIDE && CHECK_HW_DIVIDE) || 
+       DO_FOG || DO_SPEC || DO_TEX0 || DO_TEX1 ||
+       DO_TEX2 || DO_TEX3 || !HAVE_TINY_VERTICES) {
+
+      dst->v.w = w;
+
+      INTERP_UB( t, dst->ub4[4][0], out->ub4[4][0], in->ub4[4][0] );
+      INTERP_UB( t, dst->ub4[4][1], out->ub4[4][1], in->ub4[4][1] );
+      INTERP_UB( t, dst->ub4[4][2], out->ub4[4][2], in->ub4[4][2] );
+      INTERP_UB( t, dst->ub4[4][3], out->ub4[4][3], in->ub4[4][3] );
+
+      if (DO_SPEC) {
+	 INTERP_UB( t, dst->ub4[5][0], out->ub4[5][0], in->ub4[5][0] );
+	 INTERP_UB( t, dst->ub4[5][1], out->ub4[5][1], in->ub4[5][1] );
+	 INTERP_UB( t, dst->ub4[5][2], out->ub4[5][2], in->ub4[5][2] );
+      }
+      if (DO_FOG) {
+	 INTERP_UB( t, dst->ub4[5][3], out->ub4[5][3], in->ub4[5][3] );
+      }
+      if (DO_TEX0) {
+	 if (DO_PTEX) {
+	    if (HAVE_PTEX_VERTICES) {
+	       INTERP_F( t, dst->pv.u0, out->pv.u0, in->pv.u0 );
+	       INTERP_F( t, dst->pv.v0, out->pv.v0, in->pv.v0 );
+	       INTERP_F( t, dst->pv.q0, out->pv.q0, in->pv.q0 );
+	    } else {
+	       GLfloat wout = VB->NdcPtr->data[eout][3];
+	       GLfloat win = VB->NdcPtr->data[ein][3];
+	       GLfloat qout = out->pv.w / wout;
+	       GLfloat qin = in->pv.w / win;
+	       GLfloat qdst, rqdst;
+
+	       ASSERT( !HAVE_HW_DIVIDE );
+
+	       INTERP_F( t, dst->v.u0, out->v.u0 * qout, in->v.u0 * qin );
+	       INTERP_F( t, dst->v.v0, out->v.v0 * qout, in->v.v0 * qin );
+	       INTERP_F( t, qdst, qout, qin );
+
+	       rqdst = 1.0 / qdst;
+	       dst->v.u0 *= rqdst;
+	       dst->v.v0 *= rqdst;
+	       dst->v.w *= rqdst;
+	    }
+	 }
+	 else {
+#ifdef MACH64_PREMULT_TEXCOORDS
+	    GLfloat qout = 1 / out->v.w;
+	    GLfloat qin = 1 / in->v.w;
+	    
+	    INTERP_F( t, dst->v.u0, out->v.u0 * qout, in->v.u0 * qin);
+	    INTERP_F( t, dst->v.v0, out->v.v0 * qout, in->v.v0 * qin);
+
+	    dst->v.u0 *= w;
+	    dst->v.v0 *= w;
+#else
+	    INTERP_F( t, dst->v.u0, out->v.u0, in->v.u0 );
+	    INTERP_F( t, dst->v.v0, out->v.v0, in->v.v0 );
+#endif
+	 }
+      }
+      if (DO_TEX1) {
+	 if (DO_PTEX) {
+	    INTERP_F( t, dst->pv.u1, out->pv.u1, in->pv.u1 );
+	    INTERP_F( t, dst->pv.v1, out->pv.v1, in->pv.v1 );
+	    INTERP_F( t, dst->pv.q1, out->pv.q1, in->pv.q1 );
+	 } else {
+#ifdef MACH64_PREMULT_TEXCOORDS
+	    GLfloat qout = 1 / out->v.w;
+	    GLfloat qin = 1 / in->v.w;
+	    
+	    INTERP_F( t, dst->v.u1, out->v.u1 * qout, in->v.u1 * qin );
+	    INTERP_F( t, dst->v.v1, out->v.v1 * qout, in->v.v1 * qin );
+
+	    dst->v.u1 *= w;
+	    dst->v.v1 *= w;
+#else
+	    INTERP_F( t, dst->v.u1, out->v.u1, in->v.u1 );
+	    INTERP_F( t, dst->v.v1, out->v.v1, in->v.v1 );
+#endif
+	 }
+      }
+      else if (DO_PTEX) {
+	 dst->pv.q0 = 0.0;	/* must be a valid float on radeon */
+      }
+      if (DO_TEX2) {
+	 if (DO_PTEX) {
+	    INTERP_F( t, dst->pv.u2, out->pv.u2, in->pv.u2 );
+	    INTERP_F( t, dst->pv.v2, out->pv.v2, in->pv.v2 );
+	    INTERP_F( t, dst->pv.q2, out->pv.q2, in->pv.q2 );
+	 } else {
+	    INTERP_F( t, dst->v.u2, out->v.u2, in->v.u2 );
+	    INTERP_F( t, dst->v.v2, out->v.v2, in->v.v2 );
+	 }
+      }
+      if (DO_TEX3) {
+	 if (DO_PTEX) {
+	    INTERP_F( t, dst->pv.u3, out->pv.u3, in->pv.u3 );
+	    INTERP_F( t, dst->pv.v3, out->pv.v3, in->pv.v3 );
+	    INTERP_F( t, dst->pv.q3, out->pv.q3, in->pv.q3 );
+	 } else {
+	    INTERP_F( t, dst->v.u3, out->v.u3, in->v.u3 );
+	    INTERP_F( t, dst->v.v3, out->v.v3, in->v.v3 );
+	 }
+      }
+   } else {
+      /* 4-dword vertex.  Color is in v[3] and there is no oow coordinate.
+       */
+      INTERP_UB( t, dst->ub4[3][0], out->ub4[3][0], in->ub4[3][0] );
+      INTERP_UB( t, dst->ub4[3][1], out->ub4[3][1], in->ub4[3][1] );
+      INTERP_UB( t, dst->ub4[3][2], out->ub4[3][2], in->ub4[3][2] );
+      INTERP_UB( t, dst->ub4[3][3], out->ub4[3][3], in->ub4[3][3] );
+   }
+}
+
+#endif /* rgba && xyzw */
+
+
+static void TAG(init)( void )
+{
+   setup_tab[IND].emit = TAG(emit);
+
+#if (DO_XYZW && DO_RGBA)
+   setup_tab[IND].check_tex_sizes = TAG(check_tex_sizes);
+   setup_tab[IND].interp = TAG(interp);
+#endif
+
+   if (DO_SPEC)
+      setup_tab[IND].copy_pv = copy_pv_rgba4_spec5;
+   else if (HAVE_HW_DIVIDE || DO_SPEC || DO_FOG || DO_TEX0 || DO_TEX1 ||
+	    DO_TEX2 || DO_TEX3 || !HAVE_TINY_VERTICES)
+      setup_tab[IND].copy_pv = copy_pv_rgba4;
+   else
+      setup_tab[IND].copy_pv = copy_pv_rgba3;
+
+   if (DO_TEX3) {
+      if (DO_PTEX) {
+	 ASSERT(HAVE_PTEX_VERTICES);
+	 setup_tab[IND].vertex_format = PROJ_TEX3_VERTEX_FORMAT;
+	 setup_tab[IND].vertex_size = 18;
+	 setup_tab[IND].vertex_stride_shift = 7;
+      }
+      else {
+	 setup_tab[IND].vertex_format = TEX3_VERTEX_FORMAT;
+	 setup_tab[IND].vertex_size = 14;
+	 setup_tab[IND].vertex_stride_shift = 6;
+      }
+   }
+   else if (DO_TEX2) {
+      if (DO_PTEX) {
+	 ASSERT(HAVE_PTEX_VERTICES);
+	 setup_tab[IND].vertex_format = PROJ_TEX3_VERTEX_FORMAT;
+	 setup_tab[IND].vertex_size = 18;
+	 setup_tab[IND].vertex_stride_shift = 7;
+      }
+      else {
+	 setup_tab[IND].vertex_format = TEX2_VERTEX_FORMAT;
+	 setup_tab[IND].vertex_size = 12;
+	 setup_tab[IND].vertex_stride_shift = 6;
+      }
+   }
+   else if (DO_TEX1) {
+      if (DO_PTEX) {
+	 ASSERT(HAVE_PTEX_VERTICES);
+	 setup_tab[IND].vertex_format = PROJ_TEX1_VERTEX_FORMAT;
+	 setup_tab[IND].vertex_size = 12;
+	 setup_tab[IND].vertex_stride_shift = 6;
+      }
+      else {
+	 setup_tab[IND].vertex_format = TEX1_VERTEX_FORMAT;
+	 setup_tab[IND].vertex_size = 10;
+	 setup_tab[IND].vertex_stride_shift = 6;
+      }
+   }
+   else if (DO_TEX0) {
+      if (DO_PTEX && HAVE_PTEX_VERTICES) {
+	 setup_tab[IND].vertex_format = PROJ_TEX1_VERTEX_FORMAT;
+	 setup_tab[IND].vertex_size = 12;
+	 setup_tab[IND].vertex_stride_shift = 6;
+      } else {
+	 setup_tab[IND].vertex_format = TEX0_VERTEX_FORMAT;
+	 setup_tab[IND].vertex_size = 8;
+	 setup_tab[IND].vertex_stride_shift = 5;
+      }
+   }
+   else if (!HAVE_HW_DIVIDE && !DO_SPEC && !DO_FOG && HAVE_TINY_VERTICES) {
+      setup_tab[IND].vertex_format = TINY_VERTEX_FORMAT;
+      setup_tab[IND].vertex_size = 4;
+      setup_tab[IND].vertex_stride_shift = 4;
+   } else if (HAVE_NOTEX_VERTICES) {
+      setup_tab[IND].vertex_format = NOTEX_VERTEX_FORMAT;
+      setup_tab[IND].vertex_size = 6;
+      setup_tab[IND].vertex_stride_shift = 5;
+   } else {
+      setup_tab[IND].vertex_format = TEX0_VERTEX_FORMAT;
+      setup_tab[IND].vertex_size = 8;
+      setup_tab[IND].vertex_stride_shift = 5;
+   }
+
+   assert(setup_tab[IND].vertex_size * 4 <=
+          1 << setup_tab[IND].vertex_stride_shift);
+}
+
+
+#undef IND
+#undef TAG
diff --git a/src/mesa/drivers/dri/mach64/server/mach64_common.h b/src/mesa/drivers/dri/mach64/server/mach64_common.h
new file mode 100644
index 0000000000..65ec25afb6
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/server/mach64_common.h
@@ -0,0 +1,131 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/* mach64_common.h -- common header definitions for Rage Pro 2D/3D/DRM suite
+ * Created: Sun Dec 03 11:34:16 2000 by gareth@valinux.com
+ *
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author:
+ *	Gareth Hughes <gareth@valinux.com>
+ *      Leif Delgass <ldelgass@retinalburn.net>
+ */
+
+#ifndef __MACH64_COMMON_H__
+#define __MACH64_COMMON_H__ 1
+
+/* WARNING: If you change any of these defines, make sure to change
+ * the kernel include file as well (mach64_drm.h)
+ */
+
+/* Driver specific DRM command indices
+ * NOTE: these are not OS specific, but they are driver specific
+ */
+#define DRM_MACH64_INIT           0x00
+#define DRM_MACH64_IDLE           0x01
+#define DRM_MACH64_RESET          0x02
+#define DRM_MACH64_SWAP           0x03
+#define DRM_MACH64_CLEAR          0x04
+#define DRM_MACH64_VERTEX         0x05
+#define DRM_MACH64_BLIT           0x06
+#define DRM_MACH64_FLUSH          0x07
+#define DRM_MACH64_GETPARAM       0x08
+
+/* Buffer flags for clears
+ */
+#define MACH64_FRONT	          0x1
+#define MACH64_BACK	          0x2
+#define MACH64_DEPTH	          0x4
+
+/* Primitive types for vertex buffers
+ */
+#define MACH64_PRIM_POINTS		0x00000000
+#define MACH64_PRIM_LINES		0x00000001
+#define MACH64_PRIM_LINE_LOOP		0x00000002
+#define MACH64_PRIM_LINE_STRIP		0x00000003
+#define MACH64_PRIM_TRIANGLES		0x00000004
+#define MACH64_PRIM_TRIANGLE_STRIP	0x00000005
+#define MACH64_PRIM_TRIANGLE_FAN	0x00000006
+#define MACH64_PRIM_QUADS		0x00000007
+#define MACH64_PRIM_QUAD_STRIP		0x00000008
+#define MACH64_PRIM_POLYGON		0x00000009
+
+
+typedef enum _drmMach64DMAMode {
+   MACH64_MODE_DMA_ASYNC,
+   MACH64_MODE_DMA_SYNC,
+   MACH64_MODE_MMIO
+} drmMach64DMAMode;
+
+typedef struct {
+   enum {
+      DRM_MACH64_INIT_DMA    = 0x01,
+      DRM_MACH64_CLEANUP_DMA = 0x02
+   } func;
+   unsigned long sarea_priv_offset;
+   int is_pci;
+   drmMach64DMAMode dma_mode;
+
+   unsigned int fb_bpp;
+   unsigned int front_offset, front_pitch;
+   unsigned int back_offset, back_pitch;
+
+   unsigned int depth_bpp;
+   unsigned int depth_offset, depth_pitch;
+
+   unsigned long fb_offset;
+   unsigned long mmio_offset;
+   unsigned long ring_offset;
+   unsigned long buffers_offset;
+   unsigned long agp_textures_offset;
+} drmMach64Init;
+
+typedef struct {
+   unsigned int flags;
+   int x, y, w, h;
+   unsigned int clear_color;
+   unsigned int clear_depth;
+} drmMach64Clear;
+
+typedef struct {
+   int prim;
+   void *buf;			/* Address of vertex buffer */
+   unsigned long used;		/* Number of bytes in buffer */
+   int discard;			/* Client finished with buffer? */
+} drmMach64Vertex;
+
+typedef struct {
+   int idx;
+   int pitch;
+   int offset;
+   int format;
+   unsigned short x, y;
+   unsigned short width, height;
+} drmMach64Blit;
+
+typedef struct {
+   enum {
+      MACH64_PARAM_FRAMES_QUEUED = 0x01,
+      MACH64_PARAM_IRQ_NR = 0x02
+   } param;
+   int *value;
+} drmMach64GetParam;
+
+#endif /* __MACH64_COMMON_H__ */
diff --git a/src/mesa/drivers/dri/mach64/server/mach64_dri.h b/src/mesa/drivers/dri/mach64/server/mach64_dri.h
new file mode 100644
index 0000000000..f89001917c
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/server/mach64_dri.h
@@ -0,0 +1,127 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Leif Delgass <ldelgass@retinalburn.net>
+ */
+
+#ifndef __MACH64_DRI_H__
+#define __MACH64_DRI_H__ 1
+
+#include "xf86drm.h"
+#include "mach64_common.h"
+
+typedef struct {
+   drmHandle fbHandle;
+
+   drmHandle regsHandle;
+   drmSize regsSize;
+
+   int IsPCI;
+
+   drmHandle agpHandle;            /* Handle from drmAgpAlloc */
+   unsigned long agpOffset;
+   drmSize agpSize;
+   int agpMode;
+
+   /* DMA descriptor ring */
+   unsigned long     ringStart;        /* Offset into AGP space */
+   drmHandle         ringHandle;       /* Handle from drmAddMap */
+   drmSize           ringMapSize;      /* Size of map */
+   int               ringSize;         /* Size of ring (in kB) */
+   drmAddress        ringMap;          /* Map */
+
+   /* vertex buffer data */
+   unsigned long     bufferStart;      /* Offset into AGP space */
+   drmHandle         bufferHandle;     /* Handle from drmAddMap */
+   drmSize           bufferMapSize;    /* Size of map */
+   int               bufferSize;       /* Size of buffers (in MB) */
+   drmAddress        bufferMap;        /* Map */
+
+   drmBufMapPtr      drmBuffers;       /* Buffer map */
+   int               numBuffers;       /* Number of buffers */
+
+   /* AGP Texture data */
+   unsigned long     agpTexStart;      /* Offset into AGP space */
+   drmHandle         agpTexHandle;     /* Handle from drmAddMap */
+   drmSize           agpTexMapSize;    /* Size of map */
+   int               agpTexSize;       /* Size of AGP tex space (in MB) */
+   drmAddress        agpTexMap;        /* Map */
+   int               log2AGPTexGran;
+
+   int fbX;
+   int fbY;
+   int backX;
+   int backY;
+   int depthX;
+   int depthY;
+
+   int frontOffset;
+   int frontPitch;
+   int backOffset;
+   int backPitch;
+   int depthOffset;
+   int depthPitch;
+
+   int textureOffset;
+   int textureSize;
+   int logTextureGranularity;
+} ATIDRIServerInfoRec, *ATIDRIServerInfoPtr;
+
+typedef struct {
+   int chipset;
+   int width;
+   int height;
+   int mem;
+   int cpp;
+
+   int IsPCI;
+   int AGPMode;
+
+   unsigned int frontOffset;
+   unsigned int frontPitch;
+
+   unsigned int backOffset;
+   unsigned int backPitch;
+
+   unsigned int depthOffset;
+   unsigned int depthPitch;
+
+   unsigned int textureOffset;
+   unsigned int textureSize;
+   int logTextureGranularity;
+
+   drmHandle regs;
+   drmSize regsSize;
+
+   drmHandle agp;
+   drmSize agpSize;
+   unsigned int agpTextureOffset;
+   unsigned int agpTextureSize;
+   int logAgpTextureGranularity;
+} ATIDRIRec, *ATIDRIPtr;
+
+#endif /* __MACH64_DRI_H__ */
diff --git a/src/mesa/drivers/dri/mach64/server/mach64_sarea.h b/src/mesa/drivers/dri/mach64/server/mach64_sarea.h
new file mode 100644
index 0000000000..31323fa68e
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/server/mach64_sarea.h
@@ -0,0 +1,160 @@
+/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Leif Delgass <ldelgass@retinalburn.net>
+ */
+
+#ifndef __MACH64_SAREA_H__
+#define __MACH64_SAREA_H__ 1
+
+/* WARNING: If you change any of these defines, make sure to change the
+ * defines in the kernel file (mach64_drm.h)
+ */
+#ifndef __MACH64_SAREA_DEFINES__
+#define __MACH64_SAREA_DEFINES__ 1
+
+/* What needs to be changed for the current vertex buffer?
+ * GH: We're going to be pedantic about this.  We want the card to do as
+ * little as possible, so let's avoid having it fetch a whole bunch of
+ * register values that don't change all that often, if at all.
+ */
+#define MACH64_UPLOAD_DST_OFF_PITCH	0x0001
+#define MACH64_UPLOAD_Z_OFF_PITCH	0x0002
+#define MACH64_UPLOAD_Z_ALPHA_CNTL	0x0004
+#define MACH64_UPLOAD_SCALE_3D_CNTL	0x0008
+#define MACH64_UPLOAD_DP_FOG_CLR	0x0010
+#define MACH64_UPLOAD_DP_WRITE_MASK	0x0020
+#define MACH64_UPLOAD_DP_PIX_WIDTH	0x0040
+#define MACH64_UPLOAD_SETUP_CNTL	0x0080
+#define MACH64_UPLOAD_MISC		0x0100
+#define MACH64_UPLOAD_TEXTURE		0x0200
+#define MACH64_UPLOAD_TEX0IMAGE		0x0400
+#define MACH64_UPLOAD_TEX1IMAGE		0x0800
+#define MACH64_UPLOAD_CLIPRECTS		0x1000 /* handled client-side */
+#define MACH64_UPLOAD_CONTEXT		0x00ff
+#define MACH64_UPLOAD_ALL		0x1fff
+
+/* DMA buffer size
+ */
+#define MACH64_BUFFER_SIZE		16384
+
+/* Max number of swaps allowed on the ring
+ * before the client must wait
+ */
+#define MACH64_MAX_QUEUED_FRAMES        3
+
+/* Byte offsets for host blit buffer data
+ */
+#define MACH64_HOSTDATA_BLIT_OFFSET	104
+
+/* Keep these small for testing.
+ */
+#define MACH64_NR_SAREA_CLIPRECTS	8
+
+
+#define MACH64_CARD_HEAP		0
+#define MACH64_AGP_HEAP			1
+#define MACH64_NR_TEX_HEAPS		2
+#define MACH64_NR_TEX_REGIONS		64
+#define MACH64_LOG_TEX_GRANULARITY	16
+
+#define MACH64_TEX_MAXLEVELS		1
+
+#define MACH64_NR_CONTEXT_REGS		15
+#define MACH64_NR_TEXTURE_REGS		4
+
+#endif /* __MACH64_SAREA_DEFINES__ */
+
+typedef struct {
+   /* Context state */
+   unsigned int dst_off_pitch;		/* 0x500 */
+
+   unsigned int z_off_pitch;		/* 0x548 */ /* ****** */
+   unsigned int z_cntl;			/* 0x54c */
+   unsigned int alpha_tst_cntl;		/* 0x550 */
+
+   unsigned int scale_3d_cntl;		/* 0x5fc */
+
+   unsigned int sc_left_right;		/* 0x6a8 */
+   unsigned int sc_top_bottom;		/* 0x6b4 */
+
+   unsigned int dp_fog_clr;		/* 0x6c4 */
+   unsigned int dp_write_mask;		/* 0x6c8 */
+   unsigned int dp_pix_width;		/* 0x6d0 */
+   unsigned int dp_mix;			/* 0x6d4 */ /* ****** */
+   unsigned int dp_src;			/* 0x6d8 */ /* ****** */
+
+   unsigned int clr_cmp_cntl;		/* 0x708 */ /* ****** */
+   unsigned int gui_traj_cntl;		/* 0x730 */ /* ****** */
+
+   unsigned int setup_cntl;		/* 0x304 */
+
+   /* Texture state */
+   unsigned int tex_size_pitch;		/* 0x770 */
+   unsigned int tex_cntl;		/* 0x774 */
+   unsigned int secondary_tex_off;	/* 0x778 */
+   unsigned int tex_offset;		/* 0x5c0 */
+} mach64_context_regs_t;
+
+typedef struct {
+   /* The channel for communication of state information to the kernel
+    * on firing a vertex buffer.
+    */
+   mach64_context_regs_t ContextState;
+   unsigned int dirty;
+   unsigned int vertsize;
+
+   /* The current cliprects, or a subset thereof.
+    */
+   XF86DRIClipRectRec boxes[MACH64_NR_SAREA_CLIPRECTS];
+   unsigned int nbox;
+
+   /* Counter for throttling of rendering clients.
+    */
+   unsigned int frames_queued;
+
+   /* Maintain an LRU of contiguous regions of texture space.  If you
+    * think you own a region of texture memory, and it has an age
+    * different to the one you set, then you are mistaken and it has
+    * been stolen by another client.  If global texAge hasn't changed,
+    * there is no need to walk the list.
+    *
+    * These regions can be used as a proxy for the fine-grained texture
+    * information of other clients - by maintaining them in the same
+    * lru which is used to age their own textures, clients have an
+    * approximate lru for the whole of global texture space, and can
+    * make informed decisions as to which areas to kick out.  There is
+    * no need to choose whether to kick out your own texture or someone
+    * else's - simply eject them all in LRU order.
+    */
+   drmTextureRegion texList[MACH64_NR_TEX_HEAPS][MACH64_NR_TEX_REGIONS+1];
+   unsigned int texAge[MACH64_NR_TEX_HEAPS];
+
+   int ctxOwner;     /* last context to upload state */
+} ATISAREAPrivRec, *ATISAREAPrivPtr;
+
+#endif /* __MACH64_SAREA_H__ */
author	Dave Airlie <airliedfreedesktop.org>	2004-02-05 22:45:00 +0000
committer	Dave Airlie <airliedfreedesktop.org>	2004-02-05 22:45:00 +0000
commit	0fbeff2fa2e831e45e4dc6014c8f1e6abaa44aa1 (patch)
tree	56dca07fce136806001a6147861a5b666fae46a4
parent	493e6e1e900b3286c90db6dc1686162a9c869bd9 (diff)